In [11]:
import numpy as np
import cv2
import glob
import json
import dlib
from imutils import face_utils
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn import metrics
from keras_vggface.vggface import VGGFace

Using TensorFlow backend.


In [2]:
# Load the dataset
with open('data_cleaned.json') as json_file:
    data_all = json.load(json_file)
# Extract the keys in sorted order
keys_all = sorted(data_all)
# Convert python list to np array
keys_all = np.asarray(keys_all)

In [9]:
participants = glob.glob('dataset/*')
# Define global variables
DATASET_SIZE = len(keys_all)
NUM_OF_PARTICIPANTS = len(participants)
DEBUG = False
detector = dlib.get_frontal_face_detector()
model = VGGFace(model='resnet50', include_top=False, pooling='avg')

In [5]:
print("""Running attention prediction for {} participants and a total of {} images.
Each participant will be excluded from the training set once, our classifiers 
will be trained on the remaining participants and finally we will predict
the result for the one we left out. The total accuracy will be the mean value
of the accuracy of each participant""".format(NUM_OF_PARTICIPANTS, DATASET_SIZE))

Running attention prediction for 52 participants and a total of 2728 images.
Each participant will be excluded from the training set once, our classifiers 
will be trained on the remaining participants and finally we will predict
the result for the one we left out. The total accuracy will be the mean value
of the accuracy of each participant


In [20]:
# Accuracy metrics for the whole dataset. These are compouted
# by leaving every Subject out one time, calculating the accuracy for each
# one and then taking the mean value.
accuracy_rf_total = 0
accuracy_svm_total = 0
precision_rf_total = 0
precision_svm_total = 0
recall_rf_total = 0
recall_svm_total = 0


# Array to keep track of subjects with low score
low_score_subjects_rf = []
low_score_subjects_svm = []

# Loop over each participant
for j in range(1):
    
    # Extract the UUID
    uuid_excluded = participants[j].split('/')[1]

    # Loop over the dataset to remove the examples associated with this participant
    indices_excluded = []
    keys_excluded = []
    for i in range(DATASET_SIZE):
        key = keys_all[i]
        uuid = key.split('/')[0]
        if(uuid == uuid_excluded):
            indices_excluded.append(i)
            keys_excluded.append(key)
    keys = np.delete(keys_all, indices_excluded)
    CURRENT_DATASET_SIZE = keys.shape[0]
    
    faces = np.zeros((CURRENT_DATASET_SIZE, 224, 224, 3))
    labels = np.zeros(CURRENT_DATASET_SIZE)
    
    # Loop over each example to construct the training dataset. 
    for i in range(CURRENT_DATASET_SIZE):
        
        # Retrieve the key for this example
        key = keys[i]
        
        # Read the image and save the size
        im = cv2.imread('dataset/' + key)
        size = im.shape
        label = key.split('/')[1]
        
        rects = detector(im, 0)

        for rect in rects:
            # convert dlib's rectangle to a OpenCV-style bounding box
            # [i.e., (x, y, w, h)], then draw the face bounding box
            (x, y, w, h) = face_utils.rect_to_bb(rect)

            # Get rid of small faces
            if(w < 200 and h < 200):
                continue
    
            # Crop the face
            face = im[y:y+h, x:x+w]
            
            if(face.shape[0] == 0 or face.shape[1] == 0):
                print('Failed to detect face')
                continue

            # Resize to match VGGFace requirements
            face = cv2.resize(face, (224, 224))
            faces[i, :] = face
            
            if(label=='positive'):
                labels[i] = 1
            else:
                labels[i] = 0
    print(faces.shape)
    print(labels.shape)
    
    # Construct the validation dataset consisting of examples of a single participant
    # Evaluation of the classifiers will be done on this dataset
    faces_eval = np.zeros((len(keys_excluded), 224, 224, 3))
    labels_eval = np.zeros(len(keys_excluded))
    
    for i in range(len(keys_excluded)):
        # Retrieve the key for this example
        key = keys[i]
        
        # Read the image and save the size
        im = cv2.imread('dataset/' + key)
        size = im.shape
        label = key.split('/')[1]
        
        rects = detector(im, 0)

        for rect in rects:
            # convert dlib's rectangle to a OpenCV-style bounding box
            # [i.e., (x, y, w, h)], then draw the face bounding box
            (x, y, w, h) = face_utils.rect_to_bb(rect)

            # Get rid of small faces
            if(w < 200 and h < 200):
                continue
    
            # Crop the face
            face = im[y:y+h, x:x+w]
            
            if(face.shape[0] == 0 or face.shape[1] == 0):
                print('Failed to detect face')
                continue

            # Resize to match VGGFace requirements
            face = cv2.resize(face, (224, 224))
            faces_eval[i, :] = face
            
            if(label=='positive'):
                labels_eval[i] = 1
            else:
                labels_eval[i] = 0
    print(faces_eval.shape)
    print(labels_eval.shape)

Failed to detect face
Failed to detect face
Failed to detect face
Failed to detect face
Failed to detect face
Failed to detect face
Failed to detect face
Failed to detect face
Failed to detect face
Failed to detect face
Failed to detect face
Failed to detect face
Failed to detect face
Failed to detect face
Failed to detect face
Failed to detect face
Failed to detect face
Failed to detect face
Failed to detect face
Failed to detect face
(2695, 224, 224, 3)
(2695,)
(33, 224, 224, 3)
(33,)


In [21]:
features = model.predict(faces)

In [25]:
features_eval = model.predict(faces_eval)

In [22]:
print(features.shape)

(2695, 2048)


In [26]:
X_train, y_train = features, labels
X_eval, y_eval = features_eval, labels_eval

In [24]:
svm_classifier = svm.SVC(C=1, kernel='rbf', gamma='scale')
svm_classifier.fit(X_train, y_train)

SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [28]:
y_pred_svm = svm_classifier.predict(X_eval)

In [29]:
print('Training set accuracy for SVM:', svm_classifier.score(X_train, y_train))
print('Test set accuracy for SVM: ', metrics.accuracy_score(y_eval, y_pred_svm))

Training set accuracy for SVM: 0.9424860853432282
Test set accuracy for SVM:  0.9696969696969697
