In [None]:
%matplotlib inline

# Imports
import glob
import os

#Packages
import numpy
import PIL

# Squash deprecation warnings 
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning) 

In [None]:
# Setup LFW parameters
lfw_width = 62
lfw_height = 47

## Append a directory of new images to LFW

*NOTE*: Name your files with the following format: NAME_#.jpg

    where NAME will be mapped to the target label and #
    should be an integer to distinguish multiple images of the
    same subject.

In [None]:
# Load image
def convert_to_lfw(image_file_name, width=62, height=47):
    """
    Convert a given PIL-compatible file to an LFW grayscale
    image with the requested width and height.
    """
    # Load, grayscale, downscale
    image = PIL.Image.open(image_file_name)\
        .convert(mode="L")\
        .resize((height, width), PIL.Image.LANCZOS)
    
    # Return 1D np array
    return numpy.array(image).ravel(order="C")


def load_lfw_directory(path, width=62, height=47):
    """
    Load all JPG files from given path into LFW
    format np array.
    """
    # Iterate through all files
    lfw_array = numpy.array(())
    lfw_labels = []
    for file_name in os.listdir('C:/Users/andrew.baker/Desktop/LFW_Update_Photos'):
        if file_name.lower().endswith("jpg") or file_name.lower().endswith("jpeg"):
            lfw_labels.append(file_name.split("_")[0])
            if lfw_array.shape[0] == 0:
                lfw_array = convert_to_lfw(os.path.join(path,  file_name), width=width, height=height)
            else:
                lfw_array = numpy.vstack([lfw_array, convert_to_lfw(os.path.join(path, file_name),
                                                                    width=width, height=height)])
    
    # Return
    return lfw_array, lfw_labels

In [None]:
# Get LFW dataset
import sklearn.datasets
lfw_people = sklearn.datasets.fetch_lfw_people()

# n_samples, images height & width
n_samples, h, w = lfw_people.images.shape

# Get feature matrix and target vector
lfw_feature_matrix = lfw_people.data
lfw_target_vector = lfw_people.target_names

In [None]:
# Merge new data with LFW
new_image_matrix, new_image_labels = load_lfw_directory("./")
project_feature_matrix = numpy.vstack([lfw_feature_matrix,
                                      new_image_matrix])
print("Project feature shape: {0}".format(project_feature_matrix.shape))

# Update the target name mapping to append our new labels at the end of the list
project_target_names = lfw_people.target_names.tolist()
project_target_names.extend(set(new_image_labels))

# Update values for the integer target data
project_target_vector = lfw_people.target.tolist()
project_target_vector.extend([project_target_names.index(l) for l in new_image_labels])
print("Project target shape: {0}".format(len(project_target_vector)))