# Extraction of facial features


## Initializing and installation of packages

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Import packages.
import numpy as np
import pandas as pd
import os
import errno
import dlib
import cv2
import imageio
import skimage

from skimage.feature import hog
from imageio import imsave

In [None]:
# Check Python and packages' version.
!python3 --version
print("numpy: "+np.__version__)
print("pandas: "+pd.__version__)
print("dlib: "+dlib.__version__)
print("opencv: "+cv2.__version__)

Python 3.9.16
numpy: 1.22.4
pandas: 1.4.4
dlib: 19.24.1
opencv: 4.7.0


In [None]:
# Initialization of variables: Chose the value of the variables to be entered.
image_height = 48
image_width = 48
window_size = 24
window_step = 6
ONE_HOT_ENCODING = False
SAVE_IMAGES = False
GET_LANDMARKS = False
GET_HOG_FEATURES = True
GET_HOG_WINDOWS_FEATURES = False
SELECTED_LABELS = [0,1,2,3,4,5,6]
IMAGES_PER_LABEL = 40000 # Use the entire dataset.
OUTPUT_FOLDER_NAME = "/content/drive/MyDrive/TFG_FER/Extracted_Features/fer2013plus_hog"

## Load dataset

In [None]:
!unzip "Dlib.zip" # If not yet done.

Archive:  Dlib.zip
  inflating: shape_predictor_68_face_landmarks.dat  


In [None]:
# Load Dlib predictor and prepare arrays.
predictor = dlib.shape_predictor('/content/drive/MyDrive/TFG_FER/Datasets_and_packages/shape_predictor_68_face_landmarks.dat') # Predictor of facial landmarks from dlib library.
original_labels = [0, 1, 2, 3, 4, 5, 6]
new_labels = list(set(original_labels) & set(SELECTED_LABELS)) # New labels will only contain the unique elements (set) that are present in both 'original_labels' and'SELECTED_LABELS'.
nb_images_per_label = list(np.zeros(len(new_labels), 'uint8')) # Creates a new list of uint8 data that has the same length as 'new_labels' and is filled with zeros, which will later be
                                                               # used to keep track of the number of images belonging to each label.

In [None]:
# Create a new directory to store facial features.
%cd /content/drive/MyDrive/TFG_FER/Extracted_Features/
try:
    os.makedirs(OUTPUT_FOLDER_NAME) # Create new directory with the name specified in 'OUTPUT_FOLDER_NAME'.
except OSError as e: # Exception if an error 'e' occurs.
    if e.errno == errno.EEXIST and os.path.isdir(OUTPUT_FOLDER_NAME): # If the directory altready exists ('EEXIST') and it is, in fact, a directory, the action is passed.
        pass
    else:
        raise # If there is any other kind of error, rises an error.

/content/drive/MyDrive/TFG_FER/Extracted_Features


## Definition of functions

In [None]:
# Definition of function to get facial landmarks.
def get_landmarks(image, rects): # 'rects' is a list of rectangles (window of face detected).
    if len(rects) > 1: # If there is more than one face detected, the function rises a message indicating there are too many faces detected (only one detection at the time).
        raise BaseException("TooManyFaces")
    if len(rects) == 0: # If no faces have been detected, the function rises a message indicating no faces have been detected.
        raise BaseException("NoFaces")
    return np.matrix([[p.x, p.y] for p in predictor(image, rects[0]).parts()]) # If only one face has been detected, the 'predictor' function is used to obtain a set of facial landmarks
                                                                               # for the face within the given image. The landmarks are returned as a numpy matrix, where each row represents a
                                                                               # landmark point and has two columns for the x and y coordinates.

Explanation of **'np.matrix([[p.x, p.y] for p in predictor(image, rects[0]).parts()])'**:

The 'predictor' function is called on the input image and the first rectangle in the 'rects' list, which is assumed to contain the face of interest. The 'parts()' method is called on the result of the 'predictor' function, which returns a set of landmarks detected by the facial landmark detector.

The resulting set of landmarks is then used to create a two-dimensional NumPy array using a list comprehension. For each landmark point 'p' in the set, a list of its x and y coordinates '([p.x, p.y])' is created. These lists are then grouped together into a larger list that contains all the landmarks in the set.

The resulting list of landmark coordinates is then converted into a NumPy matrix using the 'np.matrix()' function. The resulting matrix has dimensions '(n,2)', where n is the number of landmarks detected in the input image. The first column of the matrix contains the x-coordinates of the landmarks, and the second column contains the y-coordinates.


In [None]:
# Definition of function to convert a label from its original form to a new label that is compatible with a particular ML model.
def get_new_label(label, one_hot_encoding=False): # 'label' is the original label, and 'one_hot_encoding' is a bool that indicates whether to use one-hot-encoding for the new label.
    if one_hot_encoding:
        new_label = new_labels.index(label) # 'new_label' is the index (position) of the original label ('label') in the 'new_labels' list.
        label = list(np.zeros(len(new_labels), 'uint8')) # Creates a numpy list of zeros with the same length as 'new_labels'.
        label[new_label] = 1 # Fills the position of the new label with a 1, to create the one_hot_encoding.
        return label
    else:
        return new_labels.index(label)

In [None]:
# Definition of function to apply the HOG algorithm on a sliding window basis.
def sliding_hog_windows(image):
    hog_windows = [] # The 'hog_windows' list will contain the HOG descriptors of each sliding window in the image.
    for y in range(0, image_height, window_step): # Slide in the y axis.
        for x in range(0, image_width, window_step): # Slide in the x axis.
            window = image[y:y+window_size, x:x+window_size] # Definition of the size of the window.
            hog_windows.extend(hog(window, orientations=8, pixels_per_cell=(8, 8), cells_per_block=(1, 1), visualize=False)) # Use '.extend()' instead of '.append()' to add the hog values array individually.
    return hog_windows

## Import dataset

In [None]:
# Unzip fer2013.csv.zip if not done already.
%cd /content/drive/MyDrive/TFG_FER/
!unzip '/content/drive/MyDrive/TFG_FER/Datasets_and_packages/fer2013.csv.zip'

In [None]:
# Import the fer2013.csv which contains the FER2013 dataset.
data = pd.read_csv('/content/drive/MyDrive/TFG_FER/Datasets_and_packages/fer2013plus.csv')

## Converting FER2013 to images and landmarks

In [None]:
for category in data['Usage'].unique(): # For each unique 'Usage' column ('Training', 'PublicTest' and 'PrivateTest') in fer2013.csv...

    print( "Converting set: " + category + "...") # To track where the conversion process is currently at.

    # Create a folder for each category.
    if not os.path.exists(category): # If there is not a directory with the same name as the category, create a folder.
        try:
            os.makedirs(OUTPUT_FOLDER_NAME + '/' + category) # The folder is created in the specified path.
        except OSError as e:
            if e.errno == errno.EEXIST and os.path.isdir(OUTPUT_FOLDER_NAME):
               pass
            else:
                raise

    # Get samples and labels of each category.
    category_data = data[data['Usage'] == category] # 'data['Usage'] == category' creates a boolean mask that filters the rows of data where the value in the 'Usage' column matches category.
                                                    # 'data[data['Usage'] == category]' returns a new DataFrame of data that includes only the rows that match the mask created in the
                                                    # previous step.
    samples = category_data['pixels'].values # Extracts the pixel data (from the 'pixel' row) of each image in the category and returns them as a NumPy array.
    labels = category_data['emotion'].values # Extracts the corresponding emotion label (from the 'emotion' row) of each image in the category and returns them as a NumPy array.

    # Get images and extract features.
    images = []
    labels_list = []
    landmarks = []
    hog_features = []
    hog_images = []
    for i in range(len(samples)): # For all the positions of images...
        try:
            if labels[i] in SELECTED_LABELS and nb_images_per_label[get_new_label(labels[i])] < IMAGES_PER_LABEL: # If the label of that image is in 'SELECTED_LABELS' and the number
                                                                                                                  # images for that label is inferior to 'IMAGES_PER_LABEL'...
                image = np.fromstring(samples[i], dtype=int, sep=" ").reshape((image_height, image_width)) # 'samples[i]' is a string representing a flattened image, where each
                                                                                                           # pixel's value is separated by a space.
                                                                                                           # 'np.fromstring(samples[i], dtype=int, sep=" ") creates a 1D numpy array
                                                                                                           # of integers from the string, where each pixel's value becomes an element in the array.
                                                                                                           # '.reshape((image_height, image_width)) reshapes the 1D array into a 2D numpy array
                                                                                                           # of shape (image_height, image_width)'.

                images.append(image)
                if SAVE_IMAGES: # 'SAVES_IMAGES' = True
                    image_uin8 = image.astype(np.uint8)
                    imsave(category + '/' + str(i) + '.jpg', image_uin8)
                if GET_HOG_WINDOWS_FEATURES: # 'GET_HOG_WINDOWS_FEATURES' = True
                    features = sliding_hog_windows(image)
                    f, hog_image = hog(image, orientations=8, pixels_per_cell=(16, 16), cells_per_block=(1, 1), visualize=True)
                    hog_features.append(features)
                    hog_images.append(hog_image)
                elif GET_HOG_FEATURES: # 'GET_HOG_FEATURES' = True
                    features, hog_image = hog(image, orientations=8, pixels_per_cell=(16, 16), cells_per_block=(1, 1), visualize=True)
                    hog_features.append(features)
                    hog_images.append(hog_image)
                if GET_LANDMARKS: # 'GET_LANDMARKS' = True
                    image_uin8 = image.astype(np.uint8)
                    imsave('temp.jpg', image_uin8) # The temp.jpg file is used in the code as a temporary file to store the image data in jpeg format. The reason for saving the image to a file is so that it can be
                                                   # loaded by the cv2 library, which is used to apply facial landmark detection using the dlib library. The cv2.imread() function can only load image data from a file
                                                   # on disk, so the temporary file is used to provide the image data to the function. Once the facial landmark detection is complete, the temporary file is no longer
                                                   # needed and can be deleted.
                    image2 = cv2.imread('temp.jpg')
                    face_rects = [dlib.rectangle(left=1, top=1, right=47, bottom=47)] # Draws a 47x47 rectangle (it is not detecting the face).
                    face_landmarks = get_landmarks(image2, face_rects)
                    landmarks.append(face_landmarks)

                labels_list.append(get_new_label(labels[i], one_hot_encoding=ONE_HOT_ENCODING)) # Appends the integer label or its one-hot encoded representation (based on the one_hot_encoding parameter) to the labels_list.
                                                                                                # 'labels_list' is a list that stores the integer labels for each image.
                nb_images_per_label[get_new_label(labels[i])] += 1 # += 1 updates the count of images processed for the corresponding integer label in the nb_images_per_label dictionary.

        # Errors.
        except Exception as e:
            print( "error in image: " + str(i) + " - " + str(e))

    np.save(OUTPUT_FOLDER_NAME + '/' + category + '/images.npy', images)
    if ONE_HOT_ENCODING:
        np.save(OUTPUT_FOLDER_NAME + '/' + category + '/labels.npy', labels_list)
    else:
        np.save(OUTPUT_FOLDER_NAME + '/' + category + '/labels.npy', labels_list)
    if GET_LANDMARKS:
        np.save(OUTPUT_FOLDER_NAME + '/' + category + '/landmarks.npy', landmarks)
    if GET_HOG_FEATURES or GET_HOG_WINDOWS_FEATURES:
        np.save(OUTPUT_FOLDER_NAME + '/' + category + '/hog_features.npy', hog_features)
        np.save(OUTPUT_FOLDER_NAME + '/' + category + '/hog_images.npy', hog_images)


Converting set: Training...
Converting set: PublicTest...
Converting set: PrivateTest...


## Bibliography
https://github.com/amineHorseman/facial-expression-recognition-svm/blob/master/convert_fer2013_to_images_and_landmarks.py