# Идентификация
Определение, кто изображен на фото

In [1]:
import os
import math
import dlib
import cv2
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import keras
from keras_vggface.vggface import VGGFace
from keras_vggface.utils import preprocess_input
from keras import backend as K
from sklearn import neighbors
from sklearn import svm
from project.dg_face import utils, detection, alignment, recognition

K.clear_session()

In [2]:
model = VGGFace(include_top=False, model='vgg16', input_shape=(224, 224, 3), pooling='avg')

In [3]:
X_train, y_train = [], []
for person in os.listdir('train'):
    for person_img in os.listdir("train/" + person):
        path = 'train/' + person + '/' + person_img
        print(path)
        img = utils.load_image(path)
        faces = detection.detect_faces(img)
        if len(faces) != 1:
            print(path, 'was skipped because of len(faces)=', len(faces))
            continue
        face_image = alignment.aligned_face(img, faces[0])
        face_image = preprocess_input(face_image.astype(float), version=1)
        encoding = model.predict(np.array([face_image]))[0]
        X_train.append(encoding)
        y_train.append(person)
X_test, y_test = [], []
for label in os.listdir('test'):
    img = utils.load_image('test/' + label)
    X_test.append(img)
    y_test.append(label)

train/alex_lacamoire/img1.jpg
train/biden/biden.jpg
train/biden/biden2.jpg
train/kit_harington/john1.jpeg
train/kit_harington/john2.jpeg
train/obama/obama.jpg
train/obama/obama2.jpg
train/rose_leslie/img1.jpg
train/rose_leslie/img2.jpg


# Нормализация и кодирование

In [4]:
from sklearn.preprocessing import Normalizer, LabelEncoder
in_encoder = Normalizer(norm='l2')
embeddings = in_encoder.transform(X_train)

out_encoder = LabelEncoder()
labels = out_encoder.fit_transform(y_train)

## Извлечение эмбеддингов
из фотографии, которой нет в тренировочном датасете

In [5]:
image_id = 4
image = X_test[image_id]
faces = detection.detect_faces(image)
_aligned_faces = alignment.aligned_faces(image, faces)
_aligned_faces = [preprocess_input(face.astype(float), version=1) for face in _aligned_faces]
predicted_embeddings = model.predict(np.array(_aligned_faces))
predicted_embeddings = in_encoder.transform(predicted_embeddings)
print('actually this is', y_test[image_id])

actually this is obama_and_biden.jpg


# Метод ближайших соседей

### Обучение классификатора

In [6]:
knn_clf = neighbors.KNeighborsClassifier(n_neighbors=int(round(math.sqrt(len(X_train)))), algorithm='ball_tree', weights='distance')
knn_clf.fit(embeddings, labels)

### Предсказание

In [7]:
closest_distances = knn_clf.kneighbors(predicted_embeddings, n_neighbors=1)
closest_distances

(array([[0.53924444],
        [0.63278095],
        [1.01666413]]),
 array([[2],
        [6],
        [2]], dtype=int64))

In [8]:
# out_encoder.inverse_transform(knn_clf.predict(embeddings_normalized))
pred = knn_clf.predict(predicted_embeddings)
out_encoder.inverse_transform(pred)

array(['biden', 'obama', 'biden'], dtype='<U14')

In [16]:
distance_threshold = 0.7
are_matches = [closest_distances[0][i][0] <= distance_threshold for i in range(len(faces))]
# // terrible syntax
res = [out_encoder.inverse_transform([pred])[0] if rec else "unknown" for pred, rec in zip(knn_clf.predict(predicted_embeddings), are_matches)]
res

['biden', 'obama', 'unknown']

# Метод опорных векторов

### Обучение классификатора

In [17]:
clf = svm.SVC(kernel='linear', probability=True, C=15)
clf.fit(embeddings, labels)

### Предсказание

In [18]:
preds = clf.predict(predicted_embeddings)
print(out_encoder.inverse_transform(preds))

['biden' 'obama' 'biden']


In [19]:
svc_threshold = 20
are_matches = [proba[pred] * 100 <= svc_threshold for proba, pred in zip(clf.predict_proba(embeddings), preds)]
res = [out_encoder.inverse_transform([pred])[0] if rec else "unknown" for pred, rec in zip(clf.predict(predicted_embeddings), are_matches)]
res

['biden', 'obama', 'unknown']

# Добавление нового лица

In [20]:
img = utils.load_image('Thomas-Shelby-Train.jpg')
faces = detection.detect_faces(img)
if len(faces) != 1:
    raise Exception('was skipped because of len(faces)=%s' % len(faces))
face_image = alignment.aligned_face(img, faces[0])
face_image = preprocess_input(face_image.astype(float), version=1)
train_encoding = model.predict(np.array([face_image]))
train_encoding = in_encoder.transform(train_encoding)

img = utils.load_image('Thomas-Shelby-Test.jpg')
faces = detection.detect_faces(img)
if len(faces) != 1:
    raise Exception('was skipped because of len(faces)=%s' % len(faces))
face_image = alignment.aligned_face(img, faces[0])
face_image = preprocess_input(face_image.astype(float), version=1)
test_encoding = model.predict(np.array([face_image]))
test_encoding = in_encoder.transform(test_encoding)



In [21]:
# заново обучим энкодер идентификаторов, включая имя нового человека
labels = out_encoder.fit_transform([*y_train, 'thomas_shelby'])
embeddings = in_encoder.transform([*X_train, *train_encoding])

In [24]:
clf.fit(embeddings, labels)
knn_clf.fit(embeddings, labels)

In [26]:
preds = clf.predict(test_encoding)
print("svc:", out_encoder.inverse_transform(preds))
preds = clf.predict(test_encoding)
print("knn:", out_encoder.inverse_transform(preds))

svc: ['thomas_shelby']
knn: ['thomas_shelby']
