In [1]:
import numpy as np
import cv2
import glob
import json
import dlib
from imutils import face_utils
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn import metrics
from keras_vggface.vggface import VGGFace

Using TensorFlow backend.


In [2]:
def show_image(image, label):
    cv2.imshow(label, image)
    ans = cv2.waitKey(0)
    cv2.destroyAllWindows()
    
    return ans

In [3]:
participants = glob.glob('dataset/*')
print(participants[0])

dataset/0656f5fe35a54d1589e526a702f578b0


In [4]:
# Load the dataset
with open('data_cleaned.json') as json_file:
    data_all = json.load(json_file)
# Extract the keys in sorted order
keys_all = sorted(data_all)
# Convert python list to np array
keys_all = np.asarray(keys_all)

In [5]:
# Number of training examples to use(0-2758)
DATASET_SIZE = len(keys_all)
DEBUG = False

In [6]:
detector = dlib.get_frontal_face_detector()

In [7]:
faces = np.zeros((DATASET_SIZE, 224, 224, 3))
labels = np.zeros(DATASET_SIZE)
uuids = []
failed_keys = []

for i in range(DATASET_SIZE):
    key = keys_all[i]
    im = cv2.imread('dataset/' + key)
    label = key.split('/')[1]
    uuid = key.split('/')[0]
    uuids.append(uuid)

    rects = detector(im, 0)

    for rect in rects:
        # convert dlib's rectangle to a OpenCV-style bounding box
        # [i.e., (x, y, w, h)], then draw the face bounding box
        (x, y, w, h) = face_utils.rect_to_bb(rect)

        # Get rid of small faces
        if(w < 200 and h < 200):
            continue

        face = im[y:y+h, x:x+w]
        
        if(face.shape[0] == 0 or face.shape[1] == 0):
            faces[i, :] = 0
            failed_keys.append(key)
            continue
        # Resize to match VGGFace requirements
        face = cv2.resize(face, (224, 224))
        faces[i, :] = face
        if(label=='positive'):
            labels[i] = 1
        else:
            labels[i] = 0

#         cv2.imshow('image', im)
#         ans = cv2.waitKey(0)
#         cv2.destroyAllWindows()
#         if ans == ord('q'): break

In [17]:
print(faces.shape)
print(labels.shape)

(2728, 224, 224, 3)
(2728,)
04b7b555cd6d4d41bc2ec8ed6ee259e0


In [10]:
print(len(failed_keys))

20


In [15]:
# Array contain UUIDs of all subjects with their multiplicity
# e.g. for one subject with 50 sample images the array will contain
# the subjects' UUID 50 times
uuids = []
for i in range(DATASET_SIZE):
    key = keys_all[i]
    uuid = key.split('/')[0]
    uuids.append(uuid)

In [48]:
print(len(uuids))

2728


In [26]:
# Calculate how many times each UUID appears
i = 0
len_counter = 0
uuid_lengths = []

prev_uuid = keys_all[0].split('/')[0]

while i < DATASET_SIZE:
    uuid = keys_all[i].split('/')[0]
    if uuid == prev_uuid:
        len_counter += 1
    else:
        uuid_lengths.append(len_counter)
        len_counter = 1
    prev_uuid = uuid
    i += 1
uuid_lengths.append(len_counter)

In [28]:
print(uuid_lengths)

[44, 63, 96, 33, 97, 43, 47, 75, 29, 50, 97, 47, 88, 31, 48, 52, 20, 42, 48, 83, 49, 42, 34, 12, 40, 34, 13, 20, 81, 92, 93, 50, 93, 67, 78, 16, 33, 99, 23, 60, 100, 22, 66, 43, 82, 50, 60, 24, 22, 35, 25, 37]


In [50]:
# Extract and save the features for each UUID
start = 0
for i in range(len(uuid_lengths)):
    temp = features[start:start + uuid_lengths[i], :].shape[0]
    np.save('VGG_datasets/features_' + uuids[start], temp)
    start += uuid_lengths[i]

In [11]:
# model = VGGFace(model='resnet50', include_top=False, pooling='avg')

# features = model.predict(faces)

# print(features.shape)

# X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2)

# print(X_train.shape)

# svm_classifier = svm.SVC(C=1, kernel='rbf', gamma='scale')
# svm_classifier.fit(X_train, y_train)
# y_pred_svm = svm_classifier.predict(X_test)

# print('Training set accuracy for SVM:', svm_classifier.score(X_train, y_train))
# print('Test set accuracy for SVM: ', metrics.accuracy_score(y_test, y_pred_svm))