In [25]:
from PIL import Image
from matplotlib import pyplot as plt
from scipy.misc import imresize
from imageio import imwrite
from IPython.core.display import display, HTML
import face_recognition_models as frm
import face_recognition as fr
import numpy as np
import threading
import queue
import glob
import cv2
import os

%matplotlib inline
display(HTML("<style>.container { width:70% !important; }</style>"))

In [27]:
def threads(num_threads, data, cb_run, *args, **kwargs):
    """
    Threads any function with any data with as many threads as you want (be careful)

    param num_threads: How many threads you want to Run
    type num_threads: int
    param data: The data that you want to parse through
    type data: list
    param cb_run: the function you want to use to parse the data
    type cb_run: function
    """
    task_list = []
    Q = queue.Queue()
    def _thread_run():
        while True:
            proc = Q.get()
            try:
                task_list.append(cb_run(proc, *args, **kwargs))
            except Exception:
                pass
            Q.task_done()

    for i in range(num_threads):
        th = threading.Thread(target=_thread_run)
        th.daemon = True
        th.start()

    for item in data:
        Q.put(item)
    Q.join()
    
    return task_list

In [28]:
def get_list_dir_contents(path):
    dir_contents = glob.glob(os.path.join(path, '*'))
    return dir_contents

In [29]:
def make_folder(fn):
    """
    Creates a folder fn if it doesn't exist
    """
    if os.path.exists(fn):
        print('found existing directory:\t', fn)
    else:
        print('created folder:\t', fn)
        os.makedirs(fn)

In [30]:
def make_nested_folders(folder_structure=None):
    """
    folder_structure (dict):  {'outer_folder1': ['subfolders', 'of', 'outer_folder1']}
        Creates (in current dir):
            outer_folder1/
                      subfolders/
                      of/
                      outer_folder1/
    """
    if folder_structure is not None:
        for parent_folder, children in folder_structure.items():
            make_folder(parent_folder)
            for child_folder in children:
                make_folder(parent_folder + '/' + child_folder)

In [31]:
def prepare_env(class_names= ['max']):
    make_nested_folders({
        ENVIRONMENT_NAME: [DATA_FOLDER_NAME, MODEL_FOLDER_NAME]
    })
    for each_person in class_names:
        person_folder = ENVIRONMENT_NAME + '/' + DATA_FOLDER_NAME + '/' + each_person
        make_nested_folders({person_folder: ['imgs', 'processed_imgs', 'face_encodings']})

In [36]:
def robust_bounding_boxes(img, model_type='hog'):
    """
    Operates under the assumption that img likely has only 1 face
    """
    attempts = 0
    bounding_boxes = fr.face_locations(img)
    downsample_pcts = [x**2 for x in np.linspace(.67, .97, 4)]
    temp_best = bounding_boxes, img
    while attempts < 4:
        lower_res_img = imresize(img, downsample_pcts[attempts])
        bounding_boxes = fr.face_locations(lower_res_img, model=model_type)
        if len(bounding_boxes) == 1:
            return lower_res_img, bounding_boxes[0]
        elif len(bounding_boxes) > 1:
            temp_best = lower_res_img, bounding_boxes[0]
        attempts += 1
    if len(temp_best[0]) > 1:
        print('UNEXPECTED: Found', len(temp_best[0]), 'faces.\n----> Only returning 1 bounding box')
        return temp_best
    else:
        print('No faces found')
        return img, None

In [33]:
def process_persons_imgs(name='max'): # env
    all_persons_img_paths = get_list_dir_contents(ENVIRONMENT_NAME + '/' + DATA_FOLDER_NAME + '/' + name + '/imgs') # fix
    data = threads(8, all_persons_img_paths, process_img)
    return data

In [34]:
def process_img(img_path, name='max'):
    img = fr.load_image_file(img_path)
    resized_img, bounding_box = robust_bounding_boxes(img)
    if '\\' in img_path: # fix
        outpath = 'processed_imgs/PR_'.join(img_path.split('imgs\\'))
    else:
        outpath = 'processed_imgs/PR_'.join(img_path.split('imgs/'))
    # insert further preprocessing here (synthetic data augmentation, etc)
    if bounding_box != None:
        imwrite(outpath, resized_img)
        encoding = fr.face_encodings(resized_img, known_face_locations=[bounding_box], num_jitters=1)
        return {'t': bounding_box[0], 'b': bounding_box[2], 'l': bounding_box[3], 'r': bounding_box[1], 'outpath': outpath, 'encoding': encoding}
    print('no faces detected:\t', img_path)
    return {'t': None, 'b': None, 'l': None, 'r': None, 'outpath': None, 'encoding': None}

In [35]:
ENVIRONMENT_NAME = 'default_env'
DATA_FOLDER_NAME = 'data'
MODEL_FOLDER_NAME = 'models'
prepare_env()

found existing directory:	 default_env
found existing directory:	 default_env/data
found existing directory:	 default_env/models
found existing directory:	 default_env/data/max
found existing directory:	 default_env/data/max/imgs
found existing directory:	 default_env/data/max/processed_imgs
found existing directory:	 default_env/data/max/face_encodings


In [23]:
person_data = process_persons_imgs(name='max')

`imresize` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``skimage.transform.resize`` instead.
  # Remove the CWD from sys.path while we load stuff.


In [37]:
person_data[0]

{'t': 325,
 'b': 511,
 'l': 304,
 'r': 489,
 'outpath': 'default_env/data/max/processed_imgs/PR_20170408_123952(1) - Copy - Copy - Copy - Copy.jpg',
 'encoding': [array([-0.08513367,  0.06522654,  0.06782659, -0.01546315, -0.00675822,
          0.06281856, -0.03286202, -0.1078885 ,  0.2059442 , -0.07514574,
          0.09266634,  0.01631441, -0.20897728,  0.03033489,  0.00726494,
          0.04656952, -0.10076991, -0.16695994, -0.05510762, -0.09297561,
         -0.00469096,  0.06527219, -0.06284109,  0.08451527, -0.1395053 ,
         -0.2936753 , -0.07827049, -0.10171979,  0.05588707, -0.04053194,
          0.0245885 ,  0.02029572, -0.21148096, -0.01495444, -0.00061015,
          0.12370961, -0.07108559, -0.12583566,  0.24609461,  0.00434989,
         -0.10185358,  0.00299108,  0.00378706,  0.26730436,  0.0962589 ,
          0.0445571 ,  0.07571812, -0.07063815,  0.22819759, -0.26048413,
          0.10347051,  0.18815698,  0.13794255,  0.0859111 ,  0.06199992,
         -0.21461138,  0.