In [1]:
import sys
sys.path.append("../")

In [2]:
import os
import pickle
from PIL import Image
import numpy as np
from typing import List
from tqdm import tqdm

from insightface.app import FaceAnalysis
from sklearn.neighbors import NearestNeighbors

In [3]:
app = FaceAnalysis(name="antelope")
app.prepare(ctx_id=0, det_size=(640, 640))

input mean and std: 127.5 127.5
find model: C:\Users\CJA/.insightface/models\antelope\glintr100.onnx recognition
find model: C:\Users\CJA/.insightface/models\antelope\scrfd_10g_bnkps.onnx detection
set det-size: (640, 640)


In [4]:
# Fixing the file extensions
YALE_DIR = "../data/yalefaces"
files = os.listdir(YALE_DIR)[1:]
for i, img in enumerate(files):
    # print("original name: ", img)
    new_ext_name = "_".join(img.split(".")) + ".gif"
    # print("new name: ",  new_ext_name)
    os.rename(os.path.join(YALE_DIR, img), os.path.join(YALE_DIR, new_ext_name))

In [5]:
def create_probe_eval_set(files: List):
    # pick random index between 0 and len(files)-1
    random_idx = np.random.randint(0,len(files))
    probe_img_fpaths = [files[random_idx]]
    eval_img_fpaths = [files[idx] for idx in range(len(files)) if idx != random_idx]
    
    return probe_img_fpaths, eval_img_fpaths

In [6]:
def generate_embs(img_fpaths: List[str]):
    embs_set = list()
    embs_label = list()

    for img_fpath in img_fpaths:  
                    
        # read grayscale img
        img = Image.open(os.path.join(YALE_DIR, img_fpath)) 
        img_arr = np.asarray(img)  
        
        # convert grayscale to rgb
        im = Image.fromarray((img_arr * 255).astype(np.uint8))
        rgb_arr = np.asarray(im.convert('RGB'))       
       
        # generate Insightface embedding
        res = app.get(rgb_arr)          
        # append emb to the eval set
        embs_set.append(res)          
        # append label to eval_label set
        embs_label.append(img_fpath.split("_")[0])          

    return embs_set, embs_label
    

In [7]:
def filter_empty_embs(img_set: List, img_labels: List[str]):
    # filtering where insightface could not generate an embedding
    good_idx = [i for i,x in enumerate(img_set) if x]
    
    if len(good_idx) == len(img_set):
        clean_embs = [e[0].embedding for e in img_set]
        clean_labels = img_labels
        
    else:
        # filtering eval set and labels based on good idx
        clean_labels = np.array(img_labels)[good_idx]
        clean_set = np.array(img_set, dtype=object)[good_idx]
        
        # generating embs for good idx
        clean_embs = [e[0].embedding for e in clean_set]
    
    return clean_embs, clean_labels

In [8]:
# sorting files
files = os.listdir(YALE_DIR)
files.sort()
eval_set = list()
eval_labels = list()
probe_set = list()
probe_labels = list()
IMAGES_PER_IDENTITY = 11
for i in tqdm(range(1, len(files), IMAGES_PER_IDENTITY), unit_divisor=True): # ignore the README.txt file at files[0]
    # print(i)
    probe, eval = create_probe_eval_set(files[i:i+IMAGES_PER_IDENTITY])
    
    # store eval embs and labels
    eval_set_t, eval_labels_t = generate_embs(eval)
    eval_set.extend(eval_set_t)
    eval_labels.extend(eval_labels_t)
    
    # store probe embs and labels
    probe_set_t, probe_labels_t = generate_embs(probe)
    probe_set.extend(probe_set_t)
    probe_labels.extend(probe_labels_t)
    

100%|██████████| 16/16 [03:34<00:00, 13.40s/it]


In [9]:
assert len(eval_set) == len(eval_labels)
assert len(probe_set) == len(probe_labels)

In [10]:
evaluation_embs, evaluation_labels = filter_empty_embs(eval_set, eval_labels)
probe_embs, probe_labels = filter_empty_embs(probe_set, probe_labels)

In [11]:
assert len(evaluation_embs) == len(evaluation_labels)
assert len(probe_embs) == len(probe_labels)