In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
import cv2
import concurrent
from tqdm import tqdm

# Defining the paths

In [2]:
IMAGES_FOLDER = 'data/images/'
CSV_FOLDER = 'faces/csv/'

margin = 0.1
SUFFIX = f'margin_{int(margin*100)}%'

# Loading the data

In [3]:
table_data = pd.read_csv(CSV_FOLDER + 'image_faces_' + SUFFIX + '.csv')
table_data.head()

Unnamed: 0,filename,faces,left_eye,right_eye,nose,lips_right,lips_left
0,1040483_1928-01-28_1953.jpg,"[33, 25, 74, 90]","[87, 59]","[53, 58]","[68, 75]","[52, 90]","[80, 90]"
1,10233725_1962-02-20_1985.jpg,"[92, 71, 154, 203]","[209, 157]","[136, 143]","[165, 190]","[126, 216]","[188, 228]"
2,113666_1944-08-15_2011.jpg,"[74, 54, 151, 213]","[186, 141]","[116, 143]","[154, 183]","[125, 214]","[183, 212]"
3,11286255_1931-04-28_1968.jpg,"[68, 45, 139, 189]","[172, 124]","[107, 114]","[134, 158]","[102, 182]","[156, 190]"
4,10164280_1969-04-03_2007.jpg,"[51, 38, 135, 177]","[147, 103]","[82, 105]","[113, 142]","[91, 169]","[146, 167]"


# Getting the face features

In [4]:
def get_np_array(data):
    return np.array([int(f) for f in data[1:-1].split(',')])

def get_face_features(row):
    face = get_np_array(row['faces'])
    face_width = face[2]
    face_height = face[3]
    
    left_eye = get_np_array(row['left_eye'])   
    right_eye = get_np_array(row['right_eye']) 
    lips_left = get_np_array(row['lips_left']) 
    lips_right = get_np_array(row['lips_right'])   
    nose = get_np_array(row['nose'])
    
    eyes_width = np.sqrt(np.sum((left_eye - right_eye)**2))
    eyes_width = eyes_width / face_width
    
    lips_width = np.sqrt(np.sum((lips_left - lips_right)**2))
    lips_width = lips_width / face_width
    
    eyes_middle = (left_eye + right_eye) / 2
    lips_middle = (lips_left + lips_right) / 2  
    eyes_lips_distance = np.sqrt(np.sum((eyes_middle - lips_middle)**2))
    eyes_lips_distance = eyes_lips_distance / face_height

    A = left_eye - nose
    B = right_eye - nose
    cos = np.dot(A, B)
    denominator = np.sqrt(np.sum(A**2)) * np.sqrt(np.sum(B**2))
    angle_eyes_nose = np.arccos(cos / denominator)

    C = lips_left - nose
    D = lips_right - nose
    cos = np.dot(C, D)
    denominator = np.sqrt(np.sum(C**2)) * np.sqrt(np.sum(D**2))
    angle_lips_nose = np.arccos(cos / denominator)
    
    return {
        'filename': row['filename'],
        'eyes_width': eyes_width,
        'lips_width': lips_width,
        'eyes_lips_distance': eyes_lips_distance,
        'angle_eyes_nose': angle_eyes_nose,
        'angle_lips_nose': angle_lips_nose
    }
    
face_features = []

with concurrent.futures.ThreadPoolExecutor() as executor:
    futures = [executor.submit(get_face_features, row) for _, row in table_data.iterrows()]
    for future in tqdm(concurrent.futures.as_completed(futures), total=table_data.shape[0]):
        face_features.append(future.result())
        
face_features = pd.DataFrame().from_dict(face_features)

100%|██████████| 8642/8642 [00:00<00:00, 400472.60it/s]


In [5]:
face_features.to_csv(CSV_FOLDER + 'additional_features_' + SUFFIX + '.csv', index=False)