In [73]:
from PIL import Image
from matplotlib import pyplot as plt
from scipy.misc import imresize
from imageio import imwrite
from sklearn.neighbors import KNeighborsClassifier as KNN
from IPython.core.display import display, HTML
import face_recognition_models as frm
import face_recognition as fr
import pandas as pd
import numpy as np
import threading
import queue
import glob
import cv2
import os

%matplotlib inline
display(HTML("<style>.container { width:70% !important; }</style>"))

In [12]:
def threads(num_threads, data, cb_run, *args, **kwargs):
    """
    Threads any function with any data with as many threads as you want (be careful)

    param num_threads: How many threads you want to Run
    type num_threads: int
    param data: The data that you want to parse through
    type data: list
    param cb_run: the function you want to use to parse the data
    type cb_run: function
    """
    task_list = []
    Q = queue.Queue()
    def _thread_run():
        while True:
            proc = Q.get()
            try:
                task_list.append(cb_run(proc, *args, **kwargs))
            except Exception:
                pass
            Q.task_done()

    for i in range(num_threads):
        th = threading.Thread(target=_thread_run)
        th.daemon = True
        th.start()

    for item in data:
        Q.put(item)
    Q.join()
    
    return task_list

In [13]:
def get_list_dir_contents(path):
    dir_contents = glob.glob(os.path.join(path, '*'))
    return dir_contents

In [14]:
def make_folder(fn):
    """
    Creates a folder fn if it doesn't exist
    """
    if os.path.exists(fn):
        print('found existing directory:\t', fn)
    else:
        print('created folder:\t', fn)
        os.makedirs(fn)

In [15]:
def make_nested_folders(folder_structure=None):
    """
    folder_structure (dict):  {'outer_folder1': ['subfolders', 'of', 'outer_folder1']}
        Creates (in current dir):
            outer_folder1/
                      subfolders/
                      of/
                      outer_folder1/
    """
    if folder_structure is not None:
        for parent_folder, children in folder_structure.items():
            make_folder(parent_folder)
            for child_folder in children:
                make_folder(parent_folder + '/' + child_folder)

In [21]:
def prepare_env(class_names=['max', 'caleb', 'drew', 'arshia']):
    make_nested_folders({
        ENVIRONMENT_NAME: [DATA_FOLDER_NAME, MODEL_FOLDER_NAME]
    })
    for each_person in class_names:
        person_folder = ENVIRONMENT_NAME + '/' + DATA_FOLDER_NAME + '/' + each_person
        make_nested_folders({person_folder: ['imgs', 'processed_imgs', 'face_encodings']})

In [29]:
def robust_bounding_boxes(img, model_type='hog'):
    """
    Operates under the assumption that img likely has only 1 face
    """
    attempts = 0
    bounding_boxes = fr.face_locations(img)
    downsample_pcts = [x**2 for x in np.linspace(.67, .97, 4)]
    temp_best = bounding_boxes, img
    while attempts < 4:
        lower_res_img = imresize(img, downsample_pcts[attempts])
        bounding_boxes = fr.face_locations(lower_res_img, model=model_type)
        if len(bounding_boxes) == 1:
            return lower_res_img, bounding_boxes[0]
        elif len(bounding_boxes) > 1:
            temp_best = lower_res_img, bounding_boxes[0]
        attempts += 1
    if len(temp_best[0]) > 1:
        print('UNEXPECTED: Found', len(temp_best[0]), 'faces.\n----> Only returning 1 bounding box')
        return temp_best
    else:
        print('No faces found')
        return img, None

In [40]:
def process_persons_imgs(name='max'): # env
    all_persons_img_paths = get_list_dir_contents(ENVIRONMENT_NAME + '/' + DATA_FOLDER_NAME + '/' + name + '/imgs') # fix
    data = threads(8, all_persons_img_paths, process_img, name=name)
    return data

In [41]:
def process_img(img_path, name='max'):
    img = fr.load_image_file(img_path)
    resized_img, bounding_box = robust_bounding_boxes(img)
    if '\\' in img_path: # fix
        outpath = 'processed_imgs/PR_'.join(img_path.split('imgs\\'))
    else:
        outpath = 'processed_imgs/PR_'.join(img_path.split('imgs/'))
    # insert further preprocessing here (synthetic data augmentation, etc)
    if bounding_box != None:
        imwrite(outpath, resized_img)
        encoding = fr.face_encodings(resized_img, known_face_locations=[bounding_box], num_jitters=1)[0]
        return {'t': bounding_box[0], 'b': bounding_box[2], 'l': bounding_box[3], 'r': bounding_box[1], 'outpath': outpath, 'encoding': encoding, 'name': name}
    print('no faces detected:\t', img_path)
    return {'t': None, 'b': None, 'l': None, 'r': None, 'outpath': None, 'encoding': None, 'name': name}

In [42]:
ENVIRONMENT_NAME = 'default_env'
DATA_FOLDER_NAME = 'data'
MODEL_FOLDER_NAME = 'models'
CLASS_NAMES = ['max', 'caleb', 'arshia', 'drew']
prepare_env(CLASS_NAMES)

found existing directory:	 default_env
found existing directory:	 default_env/data
found existing directory:	 default_env/models
found existing directory:	 default_env/data/max
found existing directory:	 default_env/data/max/imgs
found existing directory:	 default_env/data/max/processed_imgs
found existing directory:	 default_env/data/max/face_encodings
found existing directory:	 default_env/data/caleb
found existing directory:	 default_env/data/caleb/imgs
found existing directory:	 default_env/data/caleb/processed_imgs
found existing directory:	 default_env/data/caleb/face_encodings
found existing directory:	 default_env/data/arshia
found existing directory:	 default_env/data/arshia/imgs
found existing directory:	 default_env/data/arshia/processed_imgs
found existing directory:	 default_env/data/arshia/face_encodings
found existing directory:	 default_env/data/drew
found existing directory:	 default_env/data/drew/imgs
found existing directory:	 default_env/data/drew/processed_imgs
fou

In [74]:
all_data = []
for name in CLASS_NAMES:
    person_data = process_persons_imgs(name=name)
    person_df = pd.DataFrame(person_data)
    person_df.to_csv(ENVIRONMENT_NAME + '/' + DATA_FOLDER_NAME + '/' + name + '/face_encodings/encodings.csv', index=False)
    print('Done with', name)
    all_data.extend(person_data)
all_df = pd.DataFrame(all_data)

`imresize` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``skimage.transform.resize`` instead.
  # Remove the CWD from sys.path while we load stuff.


Done with max
Done with caleb
Done with arshia
Done with drew


In [75]:
knn = KNN(n_neighbors=2)
knn.fit(all_df['encoding'].values.tolist(), all_df['name'].values)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=2, p=2,
           weights='uniform')

In [76]:
img = fr.load_image_file(ENVIRONMENT_NAME + '/data/test/max.jpg')
resized_img, bounding_box = robust_bounding_boxes(img)
encoding = fr.face_encodings(resized_img, known_face_locations=[bounding_box], num_jitters=1)[0]
knn.predict([encoding])

`imresize` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``skimage.transform.resize`` instead.
  # Remove the CWD from sys.path while we load stuff.


array(['max'], dtype=object)

In [77]:
img = fr.load_image_file(ENVIRONMENT_NAME + '/data/test/arsh.jpg')
resized_img, bounding_box = robust_bounding_boxes(img)
encoding = fr.face_encodings(resized_img, known_face_locations=[bounding_box], num_jitters=1)[0]
knn.predict([encoding])

`imresize` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``skimage.transform.resize`` instead.
  # Remove the CWD from sys.path while we load stuff.


array(['arshia'], dtype=object)

In [78]:
img = fr.load_image_file(ENVIRONMENT_NAME + '/data/test/caleb.jpg')
resized_img, bounding_box = robust_bounding_boxes(img)
encoding = fr.face_encodings(resized_img, known_face_locations=[bounding_box], num_jitters=1)[0]
knn.predict([encoding])

`imresize` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``skimage.transform.resize`` instead.
  # Remove the CWD from sys.path while we load stuff.


array(['caleb'], dtype=object)

In [79]:
img = fr.load_image_file(ENVIRONMENT_NAME + '/data/test/drew.jpg')
resized_img, bounding_box = robust_bounding_boxes(img)
encoding = fr.face_encodings(resized_img, known_face_locations=[bounding_box], num_jitters=1)[0]
knn.predict([encoding])

`imresize` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``skimage.transform.resize`` instead.
  # Remove the CWD from sys.path while we load stuff.


array(['drew'], dtype=object)