## Output toTFRecord
****
### possible examples
[tf tutorials example](https://www.tensorflow.org/tutorials/load_data/tf_records) <br>
[mnist to tfrecords](https://docs.azuredatabricks.net/_static/notebooks/deep-learning/mnist-tensorflow-to-tfrecords.html) <br>
[]() <br>
[]() <br>

****
## Tested with:
```python
tensorflow==2.0.0rc0
```

In [7]:
from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf
# tf.enable_eager_execution()  # tf 2.0 no need

import os
import time
import glob
import math
import multiprocessing as mp
import itertools
import hashlib
import tempfile

import numpy as np
import openslide
from skimage.filters import threshold_otsu

import PIL

import IPython.display as display

# convert a value to a type compatible with tf.Example

def _bytes_feature(value):
    """Returns a bytes_list from a string / byte."""
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _float_feature(value):
    """Returns a float_list from a float / double."""
    return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))

def _int64_feature(value):
    """Returns an int64_list from a bool / enum / int / uint."""
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

In [8]:
output_dir = '../../DigiPath_MLTK_data/out_to_test'
if os.path.isdir(output_dir) == False:
    os.makedirs(output_dir)
    
OUTPUT_PATH = output_dir
data_dir = '../../DigiPath_MLTK_data/Aperio'
files_list = ['CMU-1-Small-Region.svs', 'CMU-1.svs']
SVS = os.path.join(data_dir, files_list[0])
print('Working with file:', SVS, '\nOutput path:', OUTPUT_PATH)

out_dir = '../../DigiPath_MLTK_data/out_to_test'
file_types = ['.jpg']
files_list = []
for maybe_file in os.listdir(out_dir):
    full_path = os.path.join(out_dir, maybe_file)
    if os.path.isfile(full_path):
        _, f_ext = os.path.splitext(maybe_file)
        if f_ext in file_types:
            files_list.append(full_path)

print(len(files_list))

Working with file: ../../DigiPath_MLTK_data/Aperio/CMU-1-Small-Region.svs 
Output path: ../../DigiPath_MLTK_data/out_to_test
999


In [9]:
def get_adjcent_segmented_length_fence_array(segment_length, length):
    fences = {}
    n_fenced = length // segment_length
    n_remain = 1 + length % segment_length
    paddit = n_remain // 2
    if n_remain == segment_length:
        paddit = 0
        n_remain = 0
        n_fenced = n_fenced + 1
    
    fence_array = np.zeros((n_fenced, 2)).astype(int)
    for k in range(n_fenced):
        if k == 0:
            fence_array[k, 0] = paddit
            fence_array[k, 1] = fence_array[k, 0] + segment_length - 1
        elif fence_array[k-1, 1] + segment_length <= length:
            fence_array[k, 0] = fence_array[k-1, 1] + 1
            fence_array[k, 1] = fence_array[k, 0] + segment_length - 1
        
    fences['fence_array'] = fence_array
    fences['n_fenced'] = n_fenced
    fences['n_remain'] = n_remain
    fences['padding'] = paddit

    return fences

segment_length = 224
length = segment_length*4 - 3

print('\tget_adjcent_segmented_length_fence_array')
print('\tn pixels: %i patch_length: %i\n'%(length, segment_length))
fences_dict = get_adjcent_segmented_length_fence_array(segment_length, length)
for k, v in fences_dict.items():
    if isinstance(v, np.ndarray):
        print('%20s:'%(k))
        for v_idx in range(v.shape[0]):
            if v[v_idx,1] - v[v_idx,0] != segment_length - 1:
                print('%24s, %6i   Bug Bug'%(v[v_idx,0], v[v_idx,1]))
            else:
                print('%24s, %6i'%(v[v_idx,0], v[v_idx,1]))
    else:
        print('%20s: %s'%(k,v))


	get_adjcent_segmented_length_fence_array
	n pixels: 893 patch_length: 224

         fence_array:
                     111,    334
                     335,    558
                     559,    782
            n_fenced: 3
            n_remain: 222
             padding: 111


In [10]:
# Create a dictionary with features that may be relevant.
def image_example(image_string, label):
    image_shape = tf.image.decode_jpeg(image_string).shape

    feature = {
        'height': _int64_feature(image_shape[0]),
        'width': _int64_feature(image_shape[1]),
        'depth': _int64_feature(image_shape[2]),
        'label': _int64_feature(label),
        'image_raw': _bytes_feature(image_string),
    }

    return tf.train.Example(features=tf.train.Features(feature=feature))

def raw_jpg_to_tfrecord(image_labels_dict, file_name='slide_images.tfrecords'):
    with tf.io.TFRecordWriter(file_name) as writer:
        for filename, label in image_labels_dict.items():
            image_string = open(filename, 'rb').read()
            tf_example = image_example(image_string, label)
            writer.write(tf_example.SerializeToString())


In [11]:
# Timing test of 1000 files 224x244 jpgs to tfrecord file:
start_time = time.time()
out_test_dir = '../../DigiPath_MLTK_data/out_to_test'
jpg_list = []
for f in os.listdir(out_test_dir):
    _, f_ext = os.path.splitext(f)
    if f_ext == '.jpg':
        full_name = os.path.join(out_test_dir, f)
        if full_name in jpg_list:
            print('duplicate name: ???', full_name)
        else:
            jpg_list.append(full_name)
        
# dictionary from files list:
image_labels = {}
for file_number in range(len(jpg_list)):
    image_labels[jpg_list[file_number]] = file_number

file_seq_number = 10
print('is file:', os.path.isfile(jpg_list[file_seq_number]))
print(jpg_list[file_seq_number], '\n', len(image_labels), 
      image_labels[jpg_list[file_seq_number]])

raw_jpg_to_tfrecord(image_labels_dict=image_labels, 
                    file_name='slide_images_test.tfrecords')

run_time = time.time() - start_time
print('%80s: %i'%('jpg_list', len(jpg_list)))
print('%80s: %i'%('image_labels', len(image_labels)), 
      '\n%80s: %0.3f'%('run_time', run_time))


is file: True
../../DigiPath_MLTK_data/out_to_test/CMU-1_row_9000_9223_col_32137_32360.jpg 
 999 10
                                                                        jpg_list: 999
                                                                    image_labels: 999 
                                                                        run_time: 0.877


#### typical
                                                                      jpg_list: 999
                                                                  image_labels: 999 
                                                                      run_time: 1.031

In [12]:
"""
small_name = /
    file_name_base + '_row_%i_%i'%(row_fence_array[row][0], row_fence_array[row][1])
small_name += /
    '_col_%i_%i%s'%(col_fence_array[col][0], col_fence_array[col][1], file_ext)
"""
def get_patch_name_from_row_col(row, col, base_name='patch', file_ext='.jpg'):
    """ patch_name = get_patch_name_from_row_col(row, col, base_name='patch', 
                                                file_ext='.jpg') 
                                                """
    if file_ext[0] != '.':
        file_ext = '.' + file_ext
    patch_name = base_name + '_row_%i_%i'%(row[0], row[1])
    patch_name += '_col_%i_%i%s'%(col[0], col[1], file_ext)
    
    return patch_name

def get_row_col_from_patch_name(fname):
    """ row_col_dict = get_row_col_from_filename(fname) 
    """
    row_label = 'row'
    col_label = 'col'
    r = []
    c = []
    base_name, file_ext = os.path.splitext(os.path.split(fname)[1])
    parts_list = base_name.split('_')
    
    for i in range(len(parts_list)):
        if parts_list[i] == row_label:
            r.append(parts_list[i+1])
            r.append(parts_list[i+2])
        elif parts_list[i] == col_label:
            c.append(parts_list[i+1])
            c.append(parts_list[i+2])
    row = np.array(r).astype(np.int)
    col = np.array(c).astype(np.int)
    return {'base_name': parts_list[0], 'file_ext': file_ext, 'row': row, 'col': col }

n = 5
for n in np.random.randint(0,999,5):
    _, one_name = os.path.split(jpg_list[n])
    print(one_name)
    rc_dict = get_row_col_from_patch_name(fname=jpg_list[n])
    
    bname = get_patch_name_from_row_col(rc_dict['row'],
                                        rc_dict['col'],
                                        rc_dict['base_name'],
                                        rc_dict['file_ext'])
    print(bname)
    print(rc_dict['row'], rc_dict['col'])
    print(rc_dict['base_name'], '\t\t', rc_dict['file_ext'], '\n')

CMU-1_row_11912_12135_col_31017_31240.jpg
CMU-1_row_11912_12135_col_31017_31240.jpg
[11912 12135] [31017 31240]
CMU-1 		 .jpg 

CMU-1_row_11464_11687_col_31017_31240.jpg
CMU-1_row_11464_11687_col_31017_31240.jpg
[11464 11687] [31017 31240]
CMU-1 		 .jpg 

CMU-1_row_2728_2951_col_28105_28328.jpg
CMU-1_row_2728_2951_col_28105_28328.jpg
[2728 2951] [28105 28328]
CMU-1 		 .jpg 

CMU-1_row_3848_4071_col_28777_29000.jpg
CMU-1_row_3848_4071_col_28777_29000.jpg
[3848 4071] [28777 29000]
CMU-1 		 .jpg 

CMU-1_row_3176_3399_col_27881_28104.jpg
CMU-1_row_3176_3399_col_27881_28104.jpg
[3176 3399] [27881 28104]
CMU-1 		 .jpg 

