# Refactor module & rename: *svs_file_to_patches_tfrecord(...)*
## Patch Names as encoding of Class, location and label

<img src='./images/file_name_coding.png' />

```python
patch_image_name_dict_0 = {'case_id': 'd83cc7d1c94', 
                         'location_x': 100, 
                         'location_y': 340, 
                         'class_label': 'dermis', 
                         'file_type': '.jpg'}

patch_image_name_dict_1 = {'case_id': 'd83cc7d1c94', 
                         'location_x': 200, 
                         'location_y': 440, 
                         'class_label': 'dermis', 
                         'file_type': '.jpg'}

```

In [1]:
import os
import warnings
os.listdir('../src/python')

['digipath_toolkit.pyc',
 '__pycache__',
 'digipath_mltk.py',
 'digipath_toolkit.py',
 'openslide_2_tfrecord.py']

In [25]:
"""
    new function added Nov 22 to assure filename may decode x and y location inputs
    
"""
def patch_name_parts_limit(name_str, space_replacer=None):
    """ Usage:  par_name = patch_name_parts_limit(name_str, <space_replacer>)
                clean up name_str such that it may be decoded with 
                patch_name_to_dict and serve as a valid file name
    Args:
        name_str:       string representation for case_id or class_label or file_extension
        space_replacer: python str to replace spaces -

    Returns:
        part_name:      name_str string with spaces removed, reserved characters removed
                        and underscores replaced with hyphens
    """
    # remove spaces: substitute if valid space replacer is input
    if space_replacer is not None and isinstance(space_replacer, str):
        name_str = name_str.replace(' ', space_replacer)

    # no spaces!
    name_str = name_str.replace(' ', '')

    # remove reserved characters
    reserved_chars = ['/', '\\', '?', '%', '*', ':', '|', '"', '<', '>']
    part_name = ''.join(c for c in name_str if not c in reserved_chars)

    # replace underscore with hyphen to allow decoding of x and y location
    part_name = part_name.replace('_', '-')

    return part_name


def patch_name_parts_clean_with_warning(file_name_base, class_label):
    """ Usage:  name_base_clean, class_label_clean = patch_name_parts_clean_with_warning(name_base, class_label)
                sanitize case_id, class_label and file_ext so that they may be decoded 
                - warn user that input parameter changed
    Args:
        file_name_base:     file name string
        class_label:        class_id
        
    Retruns:
        name_base_clean:    file_name_base with reserved_chars removed
        class_label_clean:  class_label with reserved_chars removed
        
    Warnings:               (if names are changed)
        UserWarning:        Input parameter changed
        
    """
    par_change_warning = 'Input parameter changed.\t(for name readback decoding)'
    warn_format_str = '\n%s\nparameter:\t%s\nchanged to:\t%s\n'

    name_base_clean = patch_name_parts_limit(file_name_base)
    if name_base_clean != file_name_base:
        warnings.warn(warn_format_str%(par_change_warning, file_name_base, name_base_clean))

    class_label_clean = patch_name_parts_limit(class_label)
    if class_label_clean != class_label:
        warnings.warn(warn_format_str%(par_change_warning, class_label, class_label_clean))
        
    return name_base_clean, class_label_clean

f_name_base = 'Duck_tpye [43] {null_stuff} *>? brodaglio.pip_pip_hooray'
c_label = 'funny-funny label *'


f, c = patch_name_parts_clean_with_warning(f_name_base, c_label)

Input parameter changed.	(for name readback decoding)
parameter:	Duck_tpye [43] {null_stuff} *>? brodaglio.pip_pip_hooray
changed to:	Duck-tpye[43]{null-stuff}brodaglio.pip-pip-hooray

Input parameter changed.	(for name readback decoding)
parameter:	funny-funny label *
changed to:	funny-funnylabel



In [7]:
# %load ../src/python/digipath_toolkit.py
import os
import sys

import numpy as np

def dict_to_patch_name(patch_image_name_dict):
    """ Usage:
    patch_name = dict_to_patch_name(patch_image_name_dict) 
    
    Args:
        patch_image_name_dict:  {'case_id': 'd83cc7d1c94', 
                                 'location_x': 100, 
                                 'location_y': 340, 
                                 'class_label': 'dermis', 
                                 'file_type': '.jpg' }
        
    Returns:
        patch_name:     file name (without directory path)
    """
    if patch_image_name_dict['file_type'][0] != '.':
        patch_image_name_dict['file_type'] = '.' + patch_image_name_dict['file_type']
        
    patch_name = patch_image_name_dict['case_id']
    patch_name += '_%i'%patch_image_name_dict['location_x']
    patch_name += '_%i'%patch_image_name_dict['location_y'] 
    patch_name += '_%s'%patch_image_name_dict['class_label']
    patch_name += '%s'%patch_image_name_dict['file_type']
    
    return patch_name


def patch_name_to_dict(patch_file_name):
    """ Usage:
    patch_image_name_dict = patch_name_to_dict(patch_file_name)
    
    Args:
        fname:          file name as created by get_patch_name()
        
    Returns:
        patch_image_name_dict:  {'case_id': field[0], 
                                 'location_x': int(field[1]), 
                                 'location_y': int(field[2]), 
                                 'class_label': field[3], 
                                 'file_type': '.' + field[4] }
    """
    name_type_list = patch_file_name.strip().split('.')
    name_field_list = name_type_list[0].split('_')
    
    patch_image_name_dict = {'case_id': name_field_list[0], 
                             'location_x': int(name_field_list[1]), 
                             'location_y': int(name_field_list[2]), 
                             'class_label': name_field_list[3], 
                             'file_type': '.' + name_type_list[-1]}
    
    return patch_image_name_dict


Overwriting ../src/python/digipath_toolkit.py


In [5]:
patch_image_name_dict = {'case_id': 'd83cc7d1c94', 
                         'location_x': 100, 
                         'location_y': 340, 
                         'class_label': 'dermis', 
                         'file_type': '.jpg'}

patch_file_name = dict_to_patch_name(patch_image_name_dict)
print(patch_file_name)
print('\n\t\treturned dict:\n')
patch_image_name_dict_II = patch_name_to_dict(patch_file_name)
for k, v in patch_image_name_dict_II.items():
    print('%30s: %20s == %s'%(k, str(v), str(patch_image_name_dict[k])))

d83cc7d1c94_100_340_dermis.jpg

		returned dict:

                       case_id:          d83cc7d1c94 == d83cc7d1c94
                    location_x:                  100 == 100
                    location_y:                  340 == 340
                   class_label:               dermis == dermis
                     file_type:                 .jpg == .jpg


In [6]:

patch_image_name_dict_1 = {'case_id': 'd83cc7d1c94', 
                         'location_x': 200, 
                         'location_y': 440, 
                         'class_label': 'dermis', 
                         'file_type': '.jpg'}

patch_file_name_1 = dict_to_patch_name(patch_image_name_dict_1)
print(patch_file_name_1)
print('\n\t\treturned dict:\n')
patch_image_name_dict_1_r = patch_name_to_dict(patch_file_name_1)
for k, v in patch_image_name_dict_1_r.items():
    print('%30s: %20s == %s'%(k, str(v), str(patch_image_name_dict_1[k])))

d83cc7d1c94_200_440_dermis.jpg

		returned dict:

                       case_id:          d83cc7d1c94 == d83cc7d1c94
                    location_x:                  200 == 200
                    location_y:                  440 == 440
                   class_label:               dermis == dermis
                     file_type:                 .jpg == .jpg
