# Leave-One-Subject-Out Evaluation

In [1]:
import numpy as np
import cv2
import dlib
from imutils import face_utils
import glob
import pickle
from random import shuffle
from sklearn.model_selection import train_test_split
from sklearn import metrics
import json
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm
import yaml
from scipy.spatial import distance as dist

In [2]:
def get_camera_parameters(size):
    focal_length = size[1]
    center = (size[1]/2, size[0]/2)
    camera_matrix = np.array(
                             [[focal_length, 0, center[0]],
                             [0, focal_length, center[1]],
                             [0, 0, 1]], dtype = "double"
                             )
    dist_coeffs = np.zeros((4,1)) # Assuming no lens distortion
    
    return camera_matrix, dist_coeffs

In [3]:
def get_full_image_points(landmarks):
    image_points = np.zeros((68, 2))

    for i in range(68):
        image_points[i, :] = (landmarks[i]['x'], landmarks[i]['y'])
    
    return image_points

In [4]:
def get_full_model_points(filename='model_points.txt'):
    """Get all 68 3D model points from file"""
    raw_value = []
    with open(filename) as file:
        for line in file:
            raw_value.append(line)
    model_points = np.array(raw_value, dtype=np.float32)
    model_points = np.reshape(model_points, (3, -1)).T

    # Transform the model into a front view.
    model_points[:, 2] *= -1

    return model_points

In [5]:
def visualize_image(im, rotation_vector, translation_vector, image_points, camera_matrix, dist_coeffs,
                    iris_left, iris_right, label):
    for point in image_points:
        cv2.circle(im, (int(point[0]), int(point[1])), 3, (0, 0, 255), -1)
    cv2.circle(im, (int(iris_left[0]), int(iris_left[1])), 3, (0, 0, 255), -1)
    cv2.circle(im, (int(iris_right[0]), int(iris_right[1])), 3, (0, 0, 255), -1)
    
    # Project the 3D point (0.55592, 6.5629, 300.0) onto the image plane.
    # We use this to draw a line sticking out of the nose
    (nose_end_point2D, jacobian) = cv2.projectPoints(
        np.array([(0.55592, 6.5629, 300.0)]), rotation_vector, translation_vector, camera_matrix, dist_coeffs
        )
    # Draw a line connecting the two points. This line must show
    # the direction out of the nose
    p1 = ( int(image_points[33][0]), int(image_points[33][1]) )
    p2 = ( int(nose_end_point2D[0][0][0]), int(nose_end_point2D[0][0][1]) )
    cv2.line(im, p1, p2, (255,0,0), 2)
    
    # Display image
    cv2.imshow(output, im)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

In [6]:
def eye_aspect_ratio(eye):
    # compute the euclidean distances between the two sets of
    # vertical eye landmarks (x, y)-coordinates
    A = dist.euclidean([eye[1]['x'], eye[1]['y']], [eye[5]['x'], eye[5]['y']])
    B = dist.euclidean([eye[2]['x'], eye[2]['y']], [eye[4]['x'], eye[4]['y']])
 
    # compute the euclidean distance between the horizontal
    # eye landmark (x, y)-coordinates
    C = dist.euclidean([eye[0]['x'], eye[0]['y']], [eye[3]['x'], eye[3]['y']])
 
    # compute the eye aspect ratio
    ear = (A + B) / (2.0 * C)
 
    # return the eye aspect ratio
    return ear

***

In [7]:
participants = glob.glob('dataset/*')

In [8]:
# Load the dataset
with open('data_cleaned.json') as json_file:
    data_all = json.load(json_file)
# Extract the keys in sorted order
keys_all = sorted(data_all)
# Convert python list to np array
keys_all = np.asarray(keys_all)

In [15]:
# Define global variables
DATASET_SIZE = len(keys_all)
NUM_OF_PARTICIPANTS = len(participants)
EAR_THRESHOLD = 0.17
DEBUG = False
MODEL_POINTS = get_full_model_points()

In [10]:
print("""Running attention prediction for {} participants and a total of {} images.
Each participant will be excluded from the training set once, our classifiers 
will be trained on the remaining participants and finally we will predict
the result for the one we left out. The total accuracy will be the mean value
of the accuracy of each participant""".format(NUM_OF_PARTICIPANTS, DATASET_SIZE))

Running attention prediction for 52 participants and a total of 2728 images.
Each participant will be excluded from the training set once, our classifiers 
will be trained on the remaining participants and finally we will predict
the result for the one we left out. The total accuracy will be the mean value
of the accuracy of each participant


In [20]:
# Accuracy metrics for the whole dataset. These are compouted
# by leaving every Subject out one time, calculating the accuracy for each
# one and then taking the mean value.
accuracy_rf_total = 0
accuracy_svm_total = 0
precision_rf_total = 0
precision_svm_total = 0
recall_rf_total = 0
recall_svm_total = 0


# Array to keep track of subjects with low score
low_score_subjects_rf = []
low_score_subjects_svm = []

# Loop over each participant
for j in range(NUM_OF_PARTICIPANTS):
    
    # Extract the UUID
    uuid_excluded = participants[j].split('/')[1]

    # Loop over the dataset to remove the examples associated with this participant
    indices_excluded = []
    keys_excluded = []
    for i in range(DATASET_SIZE):
        key = keys_all[i]
        uuid = key.split('/')[0]
        if(uuid == uuid_excluded):
            indices_excluded.append(i)
            keys_excluded.append(key)
#     keys = np.delete(keys_all, indices_excluded)
#     CURRENT_DATASET_SIZE = keys.shape[0]

#     # Initialize our training dataset
#     X = np.zeros((CURRENT_DATASET_SIZE, 14, 1))
#     y = np.zeros(CURRENT_DATASET_SIZE)

#     # Indices that the SolvePnP failed
#     failed_indices = []
#     # Indices where the subject blinked
#     blinked_indices = []
    
#     # Loop over each example to construct the training dataset. 
#     for i in range(CURRENT_DATASET_SIZE):
        
#         # Retrieve the key for this example
#         key = keys[i]
        
#         # Read the image and save the size
#         im = cv2.imread('dataset/' + key)
#         size = im.shape
            
#         # Get the 68 facial landmarks
#         landmarks = data_all[key]['landmarks']
        
#         # Use the above landmarks to generate image points in the 
#         # form that solvePnP() takes as input
#         image_points = get_full_image_points(landmarks)
        
#         # Get camera parameters to feed into solvePnP()
#         camera_matrix, dist_coeffs = get_camera_parameters(size)

#         # Solve the PnP problem with the parameters specified above
#         # and obtain rotation and translation vectors
#         (success, rotation_vector, translation_vector) = cv2.solvePnP(
#             MODEL_POINTS, image_points, camera_matrix, dist_coeffs, flags=cv2.SOLVEPNP_ITERATIVE
#             )
        
#         # Iris location features
#         iris_right = np.reshape(np.asarray(data_all[key]['iris_right']), (2, 1))
#         iris_left = np.reshape(np.asarray(data_all[key]['iris_left']), (2, 1))

#         # Difference vector features
#         left_vector = np.asarray( (abs(iris_left[0] - landmarks[39]['x']), abs(iris_left[1] - landmarks[39]['y'])) )
#         right_vector = np.asarray( (abs(iris_right[0] - landmarks[42]['x']), abs(iris_right[1] - landmarks[42]['y'])) )
        
#         # Concatenate all the above features to create a training example
#         X[i, :] = np.concatenate((rotation_vector, translation_vector, iris_left, iris_right, 
#                                  left_vector, right_vector), axis=0)

#         # Check if it is positive or negative example and set the groundtruth value accordingly
#         output = key.split('/')[1]
#         if(output == 'positive'):
#             y[i] = 1
#         elif(output == 'negative'):
#             y[i] = 0
    
#         # Blink Detection
#         leftEAR = eye_aspect_ratio(landmarks[36:42])
#         rightEAR = eye_aspect_ratio(landmarks[42:48])
#         ear = (leftEAR + rightEAR) / 2.0
#         if(ear <= EAR_THRESHOLD):
#             blinked_indices.append(i)
            
#         # Remove examples that SolvePnP crashed
#         if(X[i, 0] > 10000 or not success):
#             print(key)
#             failed_indices.append(i)
            
#     # Delete indices that solvePnP failed to solve correctly
#     X = np.delete(X, failed_indices, axis=0)
#     y = np.delete(y, failed_indices, axis=0)
#     # Blink detection: Remove the blinked examples
#     X = np.delete(X, blinked_indices, axis=0)
#     y = np.delete(y, blinked_indices, axis=0)
    
#     # Reshape for compatibility reasons
#     X = X.squeeze()
    
#     # Normalize features to have 0 mean and 1 variance
#     m = X.mean(axis=0)
#     std = X.std(axis=0)
#     X_scaled = (X - m)/std
    
#     ### Train the classifiers
#     X_train, y_train = X_scaled, y

#     rf_classifier = RandomForestClassifier(n_estimators=500, random_state=1)
#     rf_classifier.fit(X_train, y_train)

#     svm_classifier = svm.SVC(C=10, kernel='rbf', gamma='scale', probability=True)
#     svm_classifier.fit(X_train, y_train)

# ### Uncomment to save classifiers in pickle files so there is no need to train every time
#     with open('classifiers/rf/' + uuid_excluded + '.pickle', 'wb') as f:
#         pickle.dump(rf_classifier, f, pickle.HIGHEST_PROTOCOL)
#     with open('classifiers/svm/' + uuid_excluded + '.pickle', 'wb') as f:
#         pickle.dump(svm_classifier, f, pickle.HIGHEST_PROTOCOL)

### Uncomment to retrieve classifiers from the saved pickle files
    with open('classifiers/rf/' + uuid_excluded + '.pickle', 'rb') as f:
        rf_classifier = pickle.load(f)
    with open('classifiers/svm/' + uuid_excluded + '.pickle', 'rb') as f:
        svm_classifier = pickle.load(f)


    # Construct the validation dataset consisting of examples of a single participant
    # Evaluation of the classifiers will be done on this dataset
    X_eval = np.zeros((len(keys_excluded), 14, 1))
    y_eval = np.zeros(len(keys_excluded))
    
    # Indices where the subject blinked
    blinked_indices = []
    
    # Perform the same steps as the consturction of the training dataset
    for i in range(len(keys_excluded)):
        key = keys_excluded[i]

        im = cv2.imread('dataset/' + key)
        size = im.shape

        landmarks = data_all[key]['landmarks']
        
        camera_matrix, dist_coeffs = get_camera_parameters(size)
        
        image_points = get_full_image_points(landmarks)

        (success, rotation_vector, translation_vector) = cv2.solvePnP(
            MODEL_POINTS, image_points, camera_matrix, dist_coeffs, flags=cv2.SOLVEPNP_ITERATIVE
            )

        iris_right = np.reshape(np.asarray(data_all[key]['iris_right']), (2, 1))
        iris_left = np.reshape(np.asarray(data_all[key]['iris_left']), (2, 1))
        left_vector = np.asarray( (abs(iris_left[0] - landmarks[39]['x']), abs(iris_left[1] - landmarks[39]['y'])) )
        right_vector = np.asarray( (abs(iris_right[0] - landmarks[42]['x']), abs(iris_right[1] - landmarks[42]['y'])) )

        X_eval[i, :] = np.concatenate((rotation_vector, translation_vector, iris_left, iris_right, 
                                      left_vector, right_vector), axis=0)
        
        output = key.split('/')[1]
        if(output == 'positive'):
            y_eval[i] = 1
        elif(output == 'negative'):
            y_eval[i] = 0
        
        # Blink Detection
        leftEAR = eye_aspect_ratio(landmarks[36:42])
        rightEAR = eye_aspect_ratio(landmarks[42:48])
        ear = (leftEAR + rightEAR) / 2.0
        if(ear <= EAR_THRESHOLD):
            blinked_indices.append(i)
#             visualize_image(im, rotation_vector, translation_vector, image_points, camera_matrix,
#                            dist_coeffs, iris_left, iris_right, output)

    X_eval = X_eval.squeeze()
    
    # Number of the examples that we predicted a blink
    predicted_blinks = len(blinked_indices)
    # Counter that counts how many of the predicted blinks are true blinks
    true_blinks = 0
    # Loop over each predicted blink
    for k in blinked_indices:
        # If the groundtruth is Negative our prediction was true
        if y_eval[k] == 0:
            true_blinks += 1

    # Remove the blinked examples
    X_eval = np.delete(X_eval, blinked_indices, axis=0)
    y_eval = np.delete(y_eval, blinked_indices, axis=0)
    
    # Feature Normalization
    m_eval = X_eval.mean(axis=0)
    std_eval = X_eval.std(axis=0)
    X_eval = (X_eval - m_eval)/std_eval

    # Predict Random Forest
    y_pred_rf = rf_classifier.predict(X_eval)
    rf_accuracy_subject = metrics.accuracy_score(y_eval, y_pred_rf)
    # For the overall accuracy of each subject we must take into consideration the blink accuracy too
    rf_accuracy_subject = (rf_accuracy_subject*X_eval.shape[0] + true_blinks)/(X_eval.shape[0] + predicted_blinks)

    # Predict SVM
    threshold = 0.3
    y_prob_svm = svm_classifier.predict_proba(X_eval)
    y_pred_svm = (y_prob_svm[:, 1] >= threshold).astype(int)
    svm_accuracy_subject = metrics.accuracy_score(y_eval, y_pred_svm)
    svm_accuracy_subject = (svm_accuracy_subject*X_eval.shape[0] + true_blinks)/(X_eval.shape[0] + predicted_blinks)
    
    # From the confusion matrix of the 2 classifiers calculate precision and recall
    #### Note that the blinked examples will not be added here ----> FIX
    confusion_matrix_rf = metrics.confusion_matrix(y_eval, y_pred_rf)
    confusion_matrix_svm = metrics.confusion_matrix(y_eval, y_pred_svm)
    precision_rf = confusion_matrix_rf[1][1]/(confusion_matrix_rf[1][1] + confusion_matrix_rf[0][1])
    recall_rf = confusion_matrix_rf[1][1]/(confusion_matrix_rf[1][1] + confusion_matrix_rf[1][0])
    precision_svm = confusion_matrix_svm[1][1]/(confusion_matrix_svm[1][1] + confusion_matrix_svm[0][1])
    recall_svm = confusion_matrix_svm[1][1]/(confusion_matrix_svm[1][1] + confusion_matrix_svm[1][0])
    
    print('RF  #{} Accuracy: {} | Precision: {} | Recall: {}'.format(j, round(rf_accuracy_subject,3),
                                                            round(precision_rf,2), round(recall_rf, 2)))
    print('SVM #{} Accuracy: {} | Precision: {} | Recall: {}'.format(j, round(svm_accuracy_subject, 3),
                                                            round(precision_svm, 2), round(recall_svm, 2)))
    
    # Keep track of the participants that performed poorly for debbuging purposes
    if(rf_accuracy_subject <= 0.5):
        low_score_subjects_rf.append(uuid_excluded)
        with open('classifiers/rf/' + uuid_excluded + '.pickle', 'wb') as f:
            pickle.dump(rf_classifier, f, pickle.HIGHEST_PROTOCOL)
    if(svm_accuracy_subject <= 0.5):
        low_score_subjects_svm.append(uuid_excluded)
        with open('classifiers/svm/' + uuid_excluded + '.pickle', 'wb') as f:
            pickle.dump(svm_classifier, f, pickle.HIGHEST_PROTOCOL)
    
    # Calculate the total metrics by mulitiplying each metric with the
    # number of examples of its dataset and in the end divide by
    # the total number of examples
    accuracy_rf_total += rf_accuracy_subject*len(keys_excluded)
    accuracy_svm_total += svm_accuracy_subject*len(keys_excluded)
    precision_rf_total += precision_rf*len(keys_excluded)
    precision_svm_total += precision_svm*len(keys_excluded)
    recall_rf_total += recall_rf*len(keys_excluded)
    recall_svm_total += recall_svm*len(keys_excluded)
    
accuracy_rf_total /= DATASET_SIZE
accuracy_svm_total /= DATASET_SIZE
precision_rf_total /= DATASET_SIZE
precision_svm_total /= DATASET_SIZE
recall_rf_total /= DATASET_SIZE
recall_svm_total /= DATASET_SIZE

RF  #0 Accuracy: 0.697 | Precision: 0.9 | Recall: 0.53
SVM #0 Accuracy: 0.818 | Precision: 0.93 | Recall: 0.76
RF  #1 Accuracy: 0.806 | Precision: 0.94 | Recall: 0.77
SVM #1 Accuracy: 0.871 | Precision: 0.95 | Recall: 0.86
RF  #2 Accuracy: 0.864 | Precision: 0.85 | Recall: 0.92
SVM #2 Accuracy: 0.818 | Precision: 0.83 | Recall: 0.83
RF  #3 Accuracy: 0.692 | Precision: 0.78 | Recall: 0.78
SVM #3 Accuracy: 0.462 | Precision: 0.62 | Recall: 0.56
RF  #4 Accuracy: 0.676 | Precision: 0.86 | Recall: 0.55
SVM #4 Accuracy: 0.703 | Precision: 0.76 | Recall: 0.73
RF  #5 Accuracy: 0.788 | Precision: 0.78 | Recall: 0.8
SVM #5 Accuracy: 0.818 | Precision: 0.79 | Recall: 0.88
RF  #6 Accuracy: 0.81 | Precision: 1.0 | Recall: 0.67
SVM #6 Accuracy: 0.738 | Precision: 0.81 | Recall: 0.71
RF  #7 Accuracy: 0.871 | Precision: 0.94 | Recall: 0.77
SVM #7 Accuracy: 0.892 | Precision: 0.92 | Recall: 0.84
RF  #8 Accuracy: 0.962 | Precision: 1.0 | Recall: 0.93
SVM #8 Accuracy: 0.962 | Precision: 0.96 | Recall: 0.

In [21]:
print('Total Accuracy RF:  {}'.format(accuracy_rf_total))
print('Total Accucary SVM: {}'.format(accuracy_svm_total))

Total Accuracy RF:  0.7532991202346041
Total Accucary SVM: 0.7620967741935484


In [22]:
print('Precision RF: {}'.format(precision_rf_total))
print('Precision SVM: {}'.format(precision_svm_total))

Precision RF: 0.8393611319728038
Precision SVM: 0.8015463476106014


In [23]:
print('Recall RF: {}'.format(recall_rf_total))
print('Recall SVM: {}'.format(recall_svm_total))

Recall RF: 0.7115066259967211
Recall SVM: 0.7866064933277864


In [24]:
print(len(low_score_subjects_rf))
print(len(low_score_subjects_svm))

2
1


In [25]:
file1 = open("low_score_rf.txt", "w") 
for i in low_score_subjects_rf:
    file1.write(i + '\n')
    
file2 = open("low_score_svm.txt", "w")
for i in low_score_subjects_svm:
    file2.write(i + '\n')