This makes a few changes to the dataset. The most important is the Adaptive Histograam Equalization. These will take files from `FABRICATED_TRAIN_DIR` and `FABRICATED_TEST_DIR` (below) and will create all the various datasets to be saved within the directories listed in `FABRICATED_DIRS`. All of these processed images will also be saved in `NEW_TRAIN_DIR` which can be used when we actually train our model.

In [17]:
# Adaptive histogram equalization
import cv2
import os, ntpath, re, shutil
from PIL import Image, ImageOps
import matplotlib.pyplot as plt
import time


In [2]:
DATA_DIRECTORY = os.path.join('..', '..', 'data')

FABRICATED_DATA = os.path.join(DATA_DIRECTORY, 'fabricated', 'asl_alphabet')

ZIP_DIRECTORY = os.path.join(DATA_DIRECTORY, 'asl_alphabet.zip')

FABRICATED_TRAIN_DIR = os.path.join(FABRICATED_DATA, 'split_asl_alphabet_train')
FABRICATED_TEST_DIR = os.path.join(FABRICATED_DATA, 'split_asl_alphabet_test')

NEW_TRAIN_DIR = os.path.join(FABRICATED_DATA, 'processed_asl_train')

NEW_UNEDITED_TEST_DIR = os.path.join(FABRICATED_DATA, 'processed_original_test')
NEW_GRAYSCALE_TEST_DIR = os.path.join(FABRICATED_DATA, 'processed_grayscale_test')
NEW_GRAYSCALE_AHE_TEST_DIR = os.path.join(FABRICATED_DATA, 'processed_ahe_grayscale_asl_test')
NEW_GRAYSCALE_AHE_NEG_TEST_DIR = os.path.join(FABRICATED_DATA, 'processed_ahe_grayscale_inverted_asl_test')
NEW_RGB_AHE_TEST_DIR = os.path.join(FABRICATED_DATA, 'processed_rgb_ahe_test')
NEW_RGB_AHE_INVERTED_TEST_DIR = os.path.join(FABRICATED_DATA, 'processed_inverted_rgb_ahe_test')


# Used to quickly create all of these directories
FABRICATED_DIRS = [
    NEW_TRAIN_DIR,
    NEW_GRAYSCALE_AHE_TEST_DIR,
    NEW_GRAYSCALE_AHE_NEG_TEST_DIR,
    NEW_UNEDITED_TEST_DIR,
    NEW_GRAYSCALE_TEST_DIR,
    NEW_RGB_AHE_TEST_DIR,
    NEW_RGB_AHE_INVERTED_TEST_DIR
]

# Used to declaritively define relatinships of where we will be saving images
DIRECTORY_MAPPINGS = [
    # for the training directory, we just put all of the processed/original images in there
    {'orig_dirname': FABRICATED_TRAIN_DIR, 'new_dirnames': {'ahe_grayscale': NEW_TRAIN_DIR, 'ahe_grayscale_inverted': NEW_TRAIN_DIR, 'original': NEW_TRAIN_DIR, 'grayscale': NEW_TRAIN_DIR, 'ahe': NEW_TRAIN_DIR, 'ahe_inverted': NEW_TRAIN_DIR}},
    # We actually will create a new directory for each time of preprocessing for test images
    {'orig_dirname': FABRICATED_TEST_DIR, 'new_dirnames': {'ahe_grayscale': NEW_GRAYSCALE_AHE_TEST_DIR, 'ahe_grayscale_inverted': NEW_GRAYSCALE_AHE_NEG_TEST_DIR, 'original': NEW_UNEDITED_TEST_DIR, 'grayscale': NEW_GRAYSCALE_TEST_DIR, 'ahe': NEW_RGB_AHE_TEST_DIR, 'ahe_inverted': NEW_RGB_AHE_INVERTED_TEST_DIR}}
]



In [3]:
# https://stackoverflow.com/questions/141291/how-to-list-only-top-level-directories-in-python
# We don't want all files, just "folders" from the given directory
def get_real_directories(directory):
    return next(os.walk(directory))[1]


In [4]:
# A, B, C, D, DELETE, etc
ASL_FOLDERS = get_real_directories(FABRICATED_TRAIN_DIR)

In [5]:
## Settings
TEST_RUN = False
ZIP_FOLDERS = False
DELETE_FOLDERS = True # Delete/Empty folders before starting
MAX_FILES = None
VERBOSE = True

PROCESSING_STEPS_TO_USE = [
    'ahe_grayscale',
    # 'ahe_grayscale_inverted',
    'original',
    # 'grayscale',
    'ahe',
    # 'ahe_inverted',
]

In [6]:
def delete_folders():
    for _dir in FABRICATED_DIRS:
        try:
            shutil.rmtree(_dir)
        except:
            # nothing to do here - it's likely that the directories just don't exist yet
            return

In [7]:
def create_folders_with_letter(letter):
    for _dir in FABRICATED_DIRS:
        dir_with_letter = os.path.join(_dir, letter)
        os.makedirs(dir_with_letter)

In [8]:
def create_folders():
    # create the output folders matching the train data set folders
    successfully_created = 0
    unsuccessfully_created = 0

    for sign_name in ASL_FOLDERS:   
        try:
            create_folders_with_letter(sign_name)
            successfully_created = successfully_created + 1
        except:
            unsuccessfully_created = unsuccessfully_created + 1


    print('successfully created {} sets of folders'.format(successfully_created))
    print('unsuccessfully created {} sets of folders (probably already existed)'.format(unsuccessfully_created))

In [9]:
def create_filename_for_fabricated_image(original_filename, enhancement):
    parts = re.split('\.', original_filename)
    file_body = '.'.join(parts[:-1])
    filename_extension = parts[-1]
    # the sticky part between the filename and the filename extension
    glue = enhancement
    return '{}_{}.{}'.format(file_body, glue, filename_extension)
create_filename_for_fabricated_image('A.jpg', 'reg')

'A_reg.jpg'

In [10]:
def create_ahe_grayscale_image(img):
    # create a CLAHE object (Arguments are optional).
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(4,4))
    ahe_img = clahe.apply(img)
    return Image.fromarray(ahe_img)

In [11]:
def create_inverse_image(img):
    return ImageOps.invert(img)

In [12]:
# need to determine what exactly this one is doing
def create_ahe_image(img):
    #-----Converting image to LAB Color model----------------------------------- 
    lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
    #cv2.imshow("lab",lab)

    #-----Splitting the LAB image to different channels-------------------------
    l, a, b = cv2.split(lab)

    #-----Applying CLAHE to L-channel-------------------------------------------
    clahe = cv2.createCLAHE(clipLimit=15.0, tileGridSize=(10,10))
    cl = clahe.apply(l)
    #cv2.imshow('CLAHE output', cl)

    #-----Merge the CLAHE enhanced L-channel with the a and b channel-----------
    limg = cv2.merge((cl,a,b))
    #cv2.imshow('limg', limg)

    #-----Converting image from LAB Color model to RGB model--------------------
    final = cv2.cvtColor(limg, cv2.COLOR_LAB2RGB)
    return Image.fromarray(final)

In [13]:
def process_and_save_each_image_in_path(in_dir, sign_name, dir_mapping):
    enhancements = []
    files_saved = 0
    for infile in os.listdir(in_dir):
        imgs_to_save = []
        #-- RGB
        original_img = cv2.imread(os.path.join(in_dir, infile), 1)
        if 'original' in PROCESSING_STEPS_TO_USE:
            imgs_to_save.append(Image.fromarray(original_img)); enhancements.append('original')

        ahe_img = create_ahe_image(original_img)
        if 'ahe' in PROCESSING_STEPS_TO_USE:
            imgs_to_save.append(ahe_img); enhancements.append('ahe')
        
        #inverted_ahe_img = create_inverse_image(ahe_img)
        #imgs_to_save.append(inverted_ahe_img); enhancements.append('ahe_inverted')

        
        #-- Grayscale
        grayscale_img = cv2.cvtColor(original_img, cv2.COLOR_BGR2GRAY)
        if 'grayscale' in PROCESSING_STEPS_TO_USE:
            imgs_to_save.append(Image.fromarray(grayscale_img)); enhancements.append('grayscale')
        
        ahe_grayscale_img = create_ahe_grayscale_image(grayscale_img)
        if 'ahe_grayscale' in PROCESSING_STEPS_TO_USE:
            imgs_to_save.append(ahe_grayscale_img); enhancements.append('ahe_grayscale')
        
        #ahe_grayscale_inverted_img = create_inverse_image(ahe_grayscale_img)
        #imgs_to_save.append(ahe_grayscale_inverted_img); enhancements.append('ahe_grayscale_inverted')
        
        

        for img, enhancement in zip(imgs_to_save, enhancements):
            outdir_base = dir_mapping[enhancement]
            out_dir = os.path.join(outdir_base, sign_name)
            outfile_name = create_filename_for_fabricated_image(infile, enhancement)
            outfile_path = os.path.join(out_dir, outfile_name)

            img.save(outfile_path)
        if TEST_RUN:
            break

        files_saved += 1
        
        if files_saved == MAX_FILES:
            break

In [14]:
# create enhanced images using adaptive histogram equalization (ahe)
# create negative images of the ahe images
def enhance_and_save_images():
    if DELETE_FOLDERS:
        delete_folders()
    create_folders()

    # directory_settings Will be done for training and test image directories
    for directory_settings in DIRECTORY_MAPPINGS:
        from_dir = directory_settings['orig_dirname']
        print('copying from {}'.format(from_dir))
        dir_mapping = directory_settings['new_dirnames']
        for dirs in dir_mapping:
            print('-- copying to {}'.format(dir_mapping[dirs]))

        if VERBOSE:
            print()
            print('# Seconds | sign')
            print('-----------------------')
        for sign_name in ASL_FOLDERS:
            in_dir = os.path.join(from_dir, sign_name)

            start_time = time.time()
            process_and_save_each_image_in_path(in_dir, sign_name, dir_mapping)
            if VERBOSE:
                total_time = (time.time() - start_time)
                print ('    {}  | {}'.format(round(total_time, 2), sign_name))

%time enhance_and_save_images()

successfully created 41 sets of folders
unsuccessfully created 0 sets of folders (probably already existed)
copying from ../../data/fabricated/asl_alphabet/split_asl_alphabet_train
-- copying to ../../data/fabricated/asl_alphabet/processed_asl_train
-- copying to ../../data/fabricated/asl_alphabet/processed_asl_train
-- copying to ../../data/fabricated/asl_alphabet/processed_asl_train
-- copying to ../../data/fabricated/asl_alphabet/processed_asl_train
-- copying to ../../data/fabricated/asl_alphabet/processed_asl_train
-- copying to ../../data/fabricated/asl_alphabet/processed_asl_train

# Seconds | sign
-----------------------
    16.79  | father
    17.01  | R
    18.7  | U
    18.34  | 9
    17.1  | 7
    17.97  | I
    15.57  | N
    16.53  | G
    16.33  | 6
    15.9  | Z
    13.8  | 1
    16.29  | 10
    16.2  | 8
    14.58  | T
    15.98  | S
    14.28  | A
    15.08  | F
    16.17  | O
    16.07  | H
    6.81  | me
    6.0  | my
    17.67  | nothing
    15.03  | mother
    15.

### Zip the folders

In [15]:
from zipfile import ZipFile 
  
def get_all_file_paths(directory): 
  
    # initializing empty file paths list 
    file_paths = [] 
  
    # crawling through directory and subdirectories 
    for root, directories, files in os.walk(directory): 
        for filename in files: 
            # join the two strings in order to form the full filepath. 
            filepath = os.path.join(root, filename) 
            file_paths.append(filepath) 
  
    # returning all file paths 
    return file_paths  

In [16]:
def zip_all():
    # path to folder which needs to be zipped 
    file_paths_all = []
    number_of_files_zipped = 0
    for directory in FABRICATED_DIRS:
        # calling function to get all file paths in the directory 
        file_paths = get_all_file_paths(directory) 
        print('found {} files to zip from {}'.format(len(file_paths), directory))
        file_paths_all = file_paths_all + file_paths

    # writing files to a zipfile 
    with ZipFile(ZIP_DIRECTORY,'w') as _zip: 
        # writing each file one by one 
        for file in file_paths_all: 
            number_of_files_zipped += 1
            _zip.write(file) 

    print('All {} files zipped successfully'.format(number_of_files_zipped))

if ZIP_FOLDERS:
    %time zip_all()