# !!THIS IS UNDER PROGRESS!!
## Labeled Faces in the Wild (LFW) experiment
The face features are extracted using face_recognition/dlib library.

In [None]:
import requests
import cv2
import face_recognition
import matplotlib.pyplot as plt
import numpy as np
import math
import tarfile
import os
import pandas as pd

# optionally download and unpack Labeled Faces in the Wild (170MB)
face_archive = 'lfw.tgz'
face_dir = 'lfw'
download_faces = False
unpack_faces = False

if download_faces:
    print ('Starting download ...')
    url = "http://vis-www.cs.umass.edu/lfw/" + face_archive
    response = requests.get(url)
    if response.status_code == 200:
        with open(face_archive, 'wb') as f:
            f.write(response.content)
            
    print ('Done.')
    
if unpack_faces:
    print ('Unpacking faces...')
    tf = tarfile.open(face_archive)
    tf.extractall()
    print ('Done.')

In [None]:
def extract_face_encodings(img_path):
    # Read the image for face detection
    img_for_recognition = face_recognition.load_image_file(img_path)

    # Detect faces encodings / feature vector
    face_encodings = face_recognition.face_encodings(img_for_recognition)
    
    faces_count = len(face_encodings)
    
    if (faces_count == 0):
        print ('No face detected:', img_path)
        return None
    
    if faces_count == 1:
        # OK
        return face_encodings[0]
    
    # print ('More faces detected:', faces_count, ', path:', img_path)    
    
    # Detect faces and use encodings for the face with the largest area
    faces = face_recognition.face_locations(img_for_recognition)
    
    selected_face_index = 0
    max_face_area = 0
    
    for i, face in enumerate(faces):
        top, right, bottom, left = face
        area = (top - bottom) * (right - left)
        
        if area > max_face_area:
            selected_face_index = i
            max_face_area = area            
        
    return face_encodings[selected_face_index]

def draw_faces(img_path):
    # Read the image for face detection
    img_for_recognition = face_recognition.load_image_file(img_path)

    # Detect faces
    faces = face_recognition.face_locations(img_for_recognition)

    for face in faces:
        top, right, bottom, left = face
    
        # Draw rectangle around face
        cv2.rectangle(img_for_recognition, (left, top), (right, bottom), (0, 255, 255), 1)
        
    return img_for_recognition

# e.g. 3 faces - in this case get the face with the largest area
img = draw_faces('lfw/George_Robertson/George_Robertson_0003.jpg')
        
# Draw original figure with rectangles
plt.figure(figsize=(5, 5))
plt.axis("off")
plt.imshow(cv2.cvtColor(img, cv2.COLOR_RGBA2RGB))
plt.show()

face_encodings = extract_face_encodings('lfw/George_Robertson/George_Robertson_0003.jpg')
# vector of 128 numbers serves as an input to NN
print ('face_encodings array length:', len(face_encodings))

In [None]:
# Process data
person_list = os.listdir(path = face_dir)

all_person_extracted_faces = []
image_count = 0
errors_count = 0

print ('Processing', len(person_list), 'images ...')

for person in person_list:
    person_extracted_extracted_face_encodings = []
    all_person_extracted_faces.append({'name' : person, 'faces' : person_extracted_extracted_face_encodings})
    person_imgs = os.listdir(path = os.path.join(face_dir, person))
    
    for person_img in person_imgs:
        img_full_path = os.path.join(face_dir, person, person_img)
        extracted_face_encodings = extract_face_encodings(img_full_path)
        if extracted_face_encodings is not None:
            person_extracted_extracted_face_encodings.append(extracted_face_encodings)
            image_count += 1
    
    if len(person_extracted_extracted_face_encodings) == 0:
        errors_count += 1
        
print ('People found: ', len(person_list), ', images found:', image_count, ', errors:', errors_count)

In [None]:
# dlib extracts 128 features
input_dim = 128
print('Input dimension: ', input_dim)

# convert train output data into categories
num_classes = len(person_list)
num_samples = image_count

print('num_classes:', num_classes)
print('num_samples:', num_samples)

In [None]:
# prepare categories
y_train = np.zeros(shape=(num_samples, num_classes), dtype=np.float32)

row_pos = 0 # image
col_pos = 0 # category = person

for person_with_faces in all_person_extracted_faces:
    faces = person_with_faces['faces']
    
    faces_count = len(faces)
    
    if faces_count > 0:
        for j in range(0, faces_count):
            y_train[row_pos, col_pos] = 1.0
            row_pos += 1       
    
    # next person
    col_pos += 1

In [None]:
# prepare train input data
x_train = []

for person_with_faces in all_person_extracted_faces:
    face_encodings = person_with_faces['faces']
    
    faces_count = len(faces)
    if faces_count > 0:
        x_train.extend(face_encodings)  
    
x_train = np.array(x_train)

print('X_train.shape:', x_train.shape)
print('Y_train.shape:', y_train.shape)

In [None]:
from sklearn.model_selection import train_test_split

# split into train/test
X_train, X_test, Y_train, Y_test = train_test_split(x_train, y_train, test_size=0.1, random_state=42)

In [None]:
# Keras (from TensorFlow) imports for building of neural network
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import load_model
from tensorflow.keras.losses import categorical_crossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2

# define CNN model for animal images classification
model = Sequential()

model.add(Dense(512, input_dim=input_dim))
model.add(Activation('relu'))
model.add(Dropout(0.2))

model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.2))

model.add(Dense(num_classes))
model.add(Activation('softmax'))

model.compile(loss=categorical_crossentropy,
              optimizer=Adam(lr=1e-4),
              metrics=['accuracy'])

model.summary()

In [None]:
#model.load_weights('lfw1.h5')

In [None]:
# learn NN - tried 100 epochs
epochs = 2
batch_size = 64

history = model.fit(
    X_train,
    Y_train,
    epochs=epochs,
    validation_data=(X_test, Y_test),
    batch_size=batch_size,
    verbose=1
)

In [None]:
model.save_weights('lfw1.h5')

In [None]:
history_dict = history.history
print (history_dict.keys())

loss_values = history_dict['loss']
val_loss_values = history_dict['val_loss']
epochs = range(1, len(loss_values) + 1)

plt.plot(epochs, loss_values, 'b', label='Training loss', color='blue')
plt.plot(epochs, val_loss_values, 'b', label='Validation loss', color='yellow')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

accuracy = history_dict['accuracy']
val_accuracy = history_dict['val_accuracy']

plt.plot(epochs, accuracy, 'b', label='Training accuracy', color='red')
plt.plot(epochs, val_accuracy, 'b', label='Validation accuracy', color='green')
plt.title('Training and validation binary accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

In [None]:
# using one of train images (images from other sources are not working is currently not demonstrative)
extracted_face = extract_face_encodings('lfw/Pamela_Anderson/Pamela_Anderson_0004.jpg')
X_single = np.array(extracted_face)
X_single = np.expand_dims(X_single, axis=0)
classes = model.predict_classes(X_single, batch_size=1, verbose=1)

In [None]:
predicted_person = all_person_extracted_faces[classes[0]]
predicted_person_name = predicted_person['name']
predicted_person_faces = predicted_person['faces']

print('It could be:', predicted_person_name)