In [2]:
import face_recognition as fr
import numpy as np
from numpy.random import default_rng
import os
import tqdm
from scipy.spatial.distance import pdist, squareform
import shutil

# Embeddings
Get embeddings for all faces in the dataset.

In [62]:
dir_face_origin = os.path.join("origin", "face")
files_face = np.array([file for file in os.listdir(dir_face_origin) if file.endswith(".tif")])

In [12]:
ebds_list = []
for file_face in tqdm.tqdm(files_face):
    ebds = fr.face_encodings(fr.load_image_file(os.path.join(dir_face_origin, file_face)))
    ebds_list.append(ebds)
ebds_mat = np.concatenate(ebds_list, axis=0)

100%|██████████| 796/796 [03:36<00:00,  3.67it/s]


In [16]:
# np.save("ebds_mat.npy", ebds_mat)
ebds_mat = np.load("backup/ebds_mat.npy")
simils = 1 - squareform(pdist(ebds_mat, 'cosine'))

# Select Faces Based on Sex

* Male: 30
* Female: 30

In [63]:
def select_faces(files_face: list, sex: str, simils: np.ndarray, n: int = 30, seed: int = 0, threshold: float = 0.91) -> list:
    """Select n faces based on sex
    """
    rng = default_rng(seed)
    while True:
        indices_match_sex = [i for i, f in enumerate(files_face) if f[0] == sex]
        simils_match_sex = simils[np.ix_(indices_match_sex, indices_match_sex)]
        indices_from = np.sort(rng.choice(indices_match_sex, n))
        # remove the selected faces from the indices pool
        indices_to_pool = np.setdiff1d(indices_match_sex, indices_from)
        indices_to = np.ndarray(shape=(60,), dtype=int)
        is_okay_simil = True
        for i in range(len(indices_from)):
            cur_simils = simils[indices_from[i], indices_to_pool]
            if all(cur_simils > threshold):
                is_okay_simil = False
                break
            index_to = indices_to_pool[np.argmin(cur_simils)]
            indices_to_pool = np.setdiff1d(indices_to_pool, index_to)
            indices_to[i] = index_to
        if is_okay_simil:
            break
    return indices_from, indices_to

def sort_select_faces(indices_from: np.ndarray, indices_to: np.ndarray, simils: np.ndarray) -> np.ndarray:
    """Sort the selected faces based on the similarity
    """
    simils_sel = simils[np.ix_(indices_from, indices_to)]
    file_order = np.argsort(np.diag(simils_sel))
    return file_order

In [67]:
indices_from_m, indices_to_m = select_faces(files_face, "M", simils)
file_order_m = sort_select_faces(indices_from_m, indices_to_m, simils)
# side effect: copy files
for i in range(len(indices_from_m)):
    shutil.copy(os.path.join(dir_face_origin, files_face[indices_from_m[i]]), os.path.join("working", "face", f'{file_order_m[i]+1:03d}_from.tif'))
    shutil.copy(os.path.join(dir_face_origin,files_face[indices_to_m[i]]), os.path.join("working", "face", f'{file_order_m[i]+1:03d}_to.tif'))
indices_from_f, indices_to_f = select_faces(files_face, "F", simils)
file_order_f = sort_select_faces(indices_from_f, indices_to_f, simils)
# side effect: copy files
for i in range(len(indices_from_f)):
    shutil.copy(os.path.join(dir_face_origin, files_face[indices_from_f[i]]), os.path.join("working", "face", f'{file_order_f[i]+31:03d}_from.tif'))
    shutil.copy(os.path.join(dir_face_origin,files_face[indices_to_f[i]]), os.path.join("working", "face", f'{file_order_f[i]+31:03d}_to.tif'))

# Select Novel Faces

In [68]:
rng = default_rng(0)
files_face_remain = np.setdiff1d(files_face, np.concatenate([files_face[indices_from_m], files_face[indices_to_m], files_face[indices_from_f], files_face[indices_to_f]]))
indices_novel = rng.choice(len(files_face_remain), size=45, replace=False)
files_novel = [files_face_remain[index] for index in indices_novel]
for i in range(len(files_novel)):
    shutil.copy(os.path.join(dir_face_origin, files_novel[i]), os.path.join("working", "face", f'{i+1:03d}_novel.tif'))