# Традиционное распознавание лиц проходит через четыре основных этапа: **обнаружение**, **выравнивание**, **извлечение признаков** и **распознавание**.

Чтобы выполнить первые три шага, мы воcпользуемся библиотекой InsightFace (https://github.com/deepinsight/insightface).

In [None]:
import os
import pickle
from PIL import Image
import numpy as np
from typing import List
from tqdm import tqdm

!pip install mxnet==1.8.0.post0
!pip install -U insightface==0.2.1
!pip install onnx==1.10.1
!pip install onnxruntime==1.8.1
from sklearn.neighbors import NearestNeighbors

In [None]:
import insightface
from insightface.app import FaceAnalysis

In [None]:
!mkdir -p ~/.insightface/models/antelope/

Библиотека InsightFace уже содержит предобученные модели. Одна из таких моделей - antelope. Она подойдет нам как для обнаружении лица на изображении, так и нахождения эмбеддингов. 
Загружаем две предварительно обученные модели для обнаружения и распознавания.

In [None]:
!unzip /content/drive/MyDrive/antelope.zip > /dev/null
!cp /content/antelope/*.onnx ~/.insightface/models/antelope/

In [None]:
app = FaceAnalysis(name="antelope")
app.prepare(ctx_id=0, det_size=(640, 640))

input mean and std: 127.5 127.5
find model: /root/.insightface/models/antelope/glintr100.onnx recognition
find model: /root/.insightface/models/antelope/scrfd_10g_bnkps.onnx detection
set det-size: (640, 640)


Загружаем специально отобранную часть датасета LFW:

In [None]:
!unzip /content/drive/MyDrive/lfw-micro-concat.zip > /dev/null

In [None]:
DATA_DIR = '/content/lfw-micro-concat'

Вспомогательные функции:

In [None]:
def create_probe_eval_set(files: List):
    # выбираем случайный индекс между 0 и len(files)-1
    random_idx = np.random.randint(0,len(files))
    probe_img_fpaths = [files[random_idx]]
    eval_img_fpaths = [files[idx] for idx in range(len(files)) if idx != random_idx]
    
    return probe_img_fpaths, eval_img_fpaths

In [None]:
# генерируем эмбеддинги
def generate_embs(img_fpaths: List[str]):
    embs_set = list()
    embs_label = list()

    for img_fpath in img_fpaths:  
                    
        img = Image.open(os.path.join(DATA_DIR, img_fpath)) 
        img_arr = np.asarray(img)        
       
        # генерируем Insightface эмбеддинги
        res = app.get(img_arr)          
        embs_set.append(res)          
        # добавляем метки
        embs_label.append(img_fpath.split("_")[0])          

    return embs_set, embs_label

In [None]:
# Иногда InsightFace не может обнаружить лицо и генерирует для него пустой эмбеддинг. 
# Нужно отфильтровать такие пустые эмбеддинги.
def filter_empty_embs(img_set: List, img_labels: List[str]):
    good_idx = [i for i,x in enumerate(img_set) if x]
    
    if len(good_idx) == len(img_set):
        clean_embs = [e[0].embedding for e in img_set]
        clean_labels = img_labels
        
    else:
        clean_labels = np.array(img_labels)[good_idx]
        clean_set = np.array(img_set, dtype=object)[good_idx]
        clean_embs = [e[0].embedding for e in clean_set]
    
    return clean_embs, clean_labels

In [None]:
files = os.listdir(DATA_DIR)
files.sort()
eval_set = list()
eval_labels = list()
probe_set = list()
probe_labels = list()
# я взял по 9 фото для каждого человека из LFW (для тех, у кого было по 9 фото и больше)
IMAGES_PER_IDENTITY = 9
for i in tqdm(range(0, len(files), IMAGES_PER_IDENTITY), unit_divisor=True):
    probe, eval = create_probe_eval_set(files[i:i+IMAGES_PER_IDENTITY])
    # Генерируем эмбеддинги:
    eval_set_t, eval_labels_t = generate_embs(eval)
    eval_set.extend(eval_set_t)
    eval_labels.extend(eval_labels_t)
    
    probe_set_t, probe_labels_t = generate_embs(probe)
    probe_set.extend(probe_set_t)
    probe_labels.extend(probe_labels_t)

100%|██████████| 15/15 [06:36<00:00, 26.45s/it]


In [None]:
assert len(eval_set) == len(eval_labels)
assert len(probe_set) == len(probe_labels)

In [None]:
evaluation_embs, evaluation_labels = filter_empty_embs(eval_set, eval_labels)
probe_embs, probe_labels = filter_empty_embs(probe_set, probe_labels)

In [None]:
assert len(evaluation_embs) == len(evaluation_labels)
assert len(probe_embs) == len(probe_labels)

In [None]:
# Для распознавания будем использовать классификатор kNN с метрикой расстояния 'cosine'
nn = NearestNeighbors(n_neighbors=3, metric="cosine")
nn.fit(X=evaluation_embs)

NearestNeighbors(metric='cosine', n_neighbors=3)

In [None]:
# Сохраняем модель на диск
filename = 'faceID_model.pkl'
with open(filename, 'wb') as file:
    pickle.dump(nn, file)

In [None]:
dists, inds = nn.kneighbors(X=probe_embs, n_neighbors=2, return_distance=True)

In [None]:
# Посчитаем метрику p@k (Precision@K)
p_at_k = np.zeros(len(probe_embs))
for i in range(len(probe_embs)):
    true_label = probe_labels[i]
    pred_neighbr_idx = inds[i]
    
    pred_labels = [evaluation_labels[id] for id in pred_neighbr_idx]
    pred_is_labels = [1 if label == true_label else 0 for label in pred_labels]
    
    p_at_k[i] = np.mean(pred_is_labels)
    
p_at_k.mean()

1.0

In [None]:
# Инференс модели
def print_ID_results(img_fpath: str, evaluation_labels: np.ndarray, verbose: bool = False):
    img = Image.open(img_fpath)
    img_emb = app.get(np.asarray(img))[0].embedding
    
    # получаем предсказание
    dists, inds = nn.kneighbors(X=img_emb.reshape(1,-1), n_neighbors=3, return_distance=True)
    
    # получаем метки
    pred_labels = [evaluation_labels[i] for i in inds[0]]
    
    no_of_matching_faces = np.sum([1 if d <=0.6 else 0 for d in dists[0]])
    if no_of_matching_faces > 0:
        print("Matching face(s) found in database! ")
        verbose = True
    else: 
        print("No matching face(s) not found in database!")
        
    # выводим метки и соответствуюие расстояния
    if verbose:
        for label, dist in zip(pred_labels, dists[0]):
            print(f"Nearest neighbours found in the database have labels {label} and is at a distance of {dist}")

In [None]:
# Загружаем фото, которых модель еще не видела
!unzip /content/drive/MyDrive/lfw-micro-concat-test.zip > /dev/null

In [None]:
TEST_DIR = '/content/lfw-micro-concat-test'

In [None]:
# Протестируем полученную модель
print_ID_results("/content/lfw-micro-concat-test/George_HW_Bush_0013.jpg", evaluation_labels, verbose=True)
print_ID_results("/content/lfw-micro-concat-test/Britney_Spears_0007.jpg", evaluation_labels, verbose=True)
print_ID_results("/content/lfw-micro-concat-test/Kofi_Annan_0023.jpg", evaluation_labels, verbose=True)
print_ID_results("/content/lfw-micro-concat-test/Lindsay_Davenport_0020.jpg", evaluation_labels, verbose=True)
print_ID_results("/content/lfw-micro-concat-test/Mahmoud_Abbas_0018.jpg", evaluation_labels, verbose=True)
print_ID_results("/content/lfw-micro-concat-test/Mahmoud_Abbas_0019.jpg", evaluation_labels, verbose=True)

Matching face(s) found in database! 
Nearest neighbours found in the database have labels George and is at a distance of 1.1920928955078125e-07
Nearest neighbours found in the database have labels George and is at a distance of 0.2581465244293213
Nearest neighbours found in the database have labels George and is at a distance of 0.28309452533721924
Matching face(s) found in database! 
Nearest neighbours found in the database have labels Britney and is at a distance of 2.980232238769531e-07
Nearest neighbours found in the database have labels Britney and is at a distance of 0.21504884958267212
Nearest neighbours found in the database have labels Britney and is at a distance of 0.2667282819747925
Matching face(s) found in database! 
Nearest neighbours found in the database have labels Kofi and is at a distance of 0.15446126461029053
Nearest neighbours found in the database have labels Kofi and is at a distance of 0.16525983810424805
Nearest neighbours found in the database have labels Ko