# Facial Recognition (RUN ALL)

In [None]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches

from tensorflow import keras
from tensorflow.keras import backend as K
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Layer

In [None]:
os.chdir("../input/facialrecognitiont10/")  
%pwd
from triplet_loss import TripletLossLayer
from lfw_preprocessor import LfwDataGenerator

In [None]:
os.chdir("models/face_recognition")
from align import AlignDlib
from model import create_model
os.chdir("../..")

In [None]:
in_a = Input(shape=(96, 96, 3), name="img_a")
in_p = Input(shape=(96, 96, 3), name="img_p")
in_n = Input(shape=(96, 96, 3), name="img_n")

model_sm = create_model()

emb_a = model_sm(in_a)
emb_p = model_sm(in_p)
emb_n = model_sm(in_n)

triplet_loss_layer = TripletLossLayer(alpha=0.2, name='triplet_loss_layer')([emb_a, emb_p, emb_n])

facial_rec_model = Model([in_a, in_p, in_n], triplet_loss_layer)
facial_rec_model.load_weights("epoch097_loss0.176.hdf5")
facial_rec_model.summary()

facial_rec_base_model = facial_rec_model.layers[3]

In [None]:
def set_base_embeddings():
    embeddings = np.empty((49, 128))
    for i in range(49):
        print("now processing: face_recognition_dataset/" + str(i) + ".png" )
        img = load_image('face_recognition_dataset/' + str(i) + ".png")
        img = align_image(img)
        img = img.astype('float32')
        img = img / 255.0
        img = np.expand_dims(img, axis=0)
        embeddings[i] = facial_rec_base_model.predict(img)
        
    return embeddings

import csv
from numpy import asarray
def name_mapping():
    id_names = []
    with open('face_recognition_dataset/person_id_name_mapping.csv') as id_name_map_csv:
        csv_dict_reader = csv.DictReader(id_name_map_csv)
        for row in csv_dict_reader:
            id_names.append({
                "id": int(row["person_id"]),
                "name": row["person_name"]})
    return id_names

def preprocess(img, box):
    face = img.crop((int(box[0]), int(box[1]), int(box[2]), int(box[3])))
    face = asarray(face)
    face = face[...,::-1]
    face = align_image(face)
    face = face.astype('float32')
    face = face / 255.0
    face = np.expand_dims(face, axis=0)
    return face

def distance(emb1, emb2):
    return np.sum(np.square(emb1 - emb2))

def infer(face):

    face_embed = facial_rec_base_model.predict(face)
    minDistance = distance(face_embed, embeddings[0])
    minIndex = 0


    for i in range(1, 49):
        if(distance(face_embed, embeddings[i]) < minDistance):
            minDistance = distance(face_embed, embeddings[i]) 
            minIndex = i
    return next(item for item in names if item["id"] == minIndex)["name"]

def align_image(img):
    alignment = AlignDlib('models/landmarks.dat')
    bb = alignment.getLargestFaceBoundingBox(img)
    if bb is None:
        return cv2.resize(img, (96,96))
    else:
        return alignment.align(96, 
                               img, 
                               bb,
                               landmarkIndices=AlignDlib.OUTER_EYES_AND_NOSE)
def load_image(path):
    img = cv2.imread(path, 1)
    # OpenCV loads images with color channels
    # in BGR order. So we need to reverse them
    return img[...,::-1]

# DETR Model

In [None]:
os.chdir('detr')

In [None]:
import argparse
import random
from pathlib   import Path
import numpy as np
import torch
import torchvision.transforms as T
import matplotlib.pyplot as plt
import PIL.Image
import util.misc as utils
os.chdir('..')
import detr.models
from detr.models import build_model
!pip install pycocotools
os.chdir('detr')
%pwd 
%ls 
import pycocotools
from main import get_args_parser

In [None]:
parser = argparse.ArgumentParser(description='DETR args parser', parents=[get_args_parser()])
args = parser.parse_args(args=[])
#This now loads the newly trained face detection weights
args.resume = 'checkpoint.pth'
args.device = 'cpu'

if args.output_dir:
    Path(args.output_dir).mkdir(parents=True, exist_ok=True)
args.distributed = False
print(args)

In [None]:
face_detection_model, criterion, postprocessors = build_model(args)
device = torch.device(args.device)
face_detection_model.to(device)

In [None]:
os.chdir('..')

In [None]:
output_dir = Path(args.output_dir)
if args.resume:
    if args.resume.startswith('https'):
        checkpoint = torch.hub.load_state_dict_from_url(
            args.resume, map_location='cpu', check_hash=True)
    else:
        checkpoint = torch.load(args.resume, map_location='cpu')
    face_detection_model.load_state_dict(checkpoint["model"], strict=True)

In [None]:
CLASSES = ['none', 'person']
# colors for visualization
COLORS = [[0.000, 0.447, 0.741], [0.850, 0.325, 0.098], [0.929, 0.694, 0.125],
          [0.494, 0.184, 0.556], [0.466, 0.674, 0.188], [0.301, 0.745, 0.933]]

# standard PyTorch mean-std input image normalization
transform = T.Compose([
    T.ToTensor(),
    T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# for output bounding box post-processing
def box_cxcywh_to_xyxy(x):
    x_c, y_c, w, h = x.unbind(1)
    b = [(x_c - 0.5 * w), (y_c - 0.5 * h),
         (x_c + 0.5 * w), (y_c + 0.5 * h)]
    return torch.stack(b, dim=1)

def rescale_bboxes(out_bbox, size):
    img_w, img_h = size
    b = box_cxcywh_to_xyxy(out_bbox)
    b = b * torch.tensor([img_w, img_h, img_w, img_h], dtype=torch.float32)
    return b


In [None]:
def detect(im, model, transform):
    img = transform(im).unsqueeze(0)
    assert img.shape[-2] <= 1600 and img.shape[-1] <= 1600, 'demo model only supports images up to 1600 pixels on each size'

    outputs = face_detection_model(img)

    probas = outputs['pred_logits'].softmax(-1)[0, :, :-1]
    keep = probas.max(-1).values > 0.7
    bboxes_scaled = rescale_bboxes(outputs['pred_boxes'][0, keep], im.size)
    return probas[keep], bboxes_scaled

In [None]:
def plot_results(pil_img, prob, boxes, classes):
    plt.figure(figsize=(16,10))
    plt.imshow(pil_img)
    ax = plt.gca()
    for p, (xmin, ymin, xmax, ymax), c in zip(prob, boxes.tolist(), COLORS * 100):
        c1 = p.argmax()
        if CLASSES[c1] not in classes:
            continue
        ax.add_patch(plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, color=c, linewidth=3))
        text =f'{CLASSES[c1]}: {p[c1]:0.2f}'
        ax.text(xmin, ymin, text, fontsize=15, bbox=dict(facecolor = 'yellow', alpha=0.5))

    plt.axis('off')
    plt.show()

# Driver (RUN ALL)


In [None]:
def plot_predictions(pil_img, prob, boxes, classes, matched_people):
    plt.figure(figsize=(16,10))
    plt.imshow(pil_img)
    ax = plt.gca()
    for p, (xmin, ymin, xmax, ymax), c, mp in zip(prob, boxes.tolist(), COLORS * 100, matched_people):
        c1 = p.argmax()
        if CLASSES[c1] not in classes:
            continue
        ax.add_patch(plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, color=c, linewidth=3))
        text = mp
        ax.text(xmin, ymin, text, fontsize=15, bbox=dict(facecolor = 'yellow', alpha=0.5))

    plt.axis('off')
    plt.show()

In [None]:
def predict_names(boxes, img_letter):
  #Below are 2 arrays recording the corresponding id for each bounding box and 
  #their distance
    prediction_names = [None] * len(boxes)
    prediction_distances = [None] * len(boxes)

    for i in range(len(base_embeddings)):
        face_embed = base_embeddings[i]
        distances = []
        for j in range(len(pp_face_embeds)):
            distances.append(distance(face_embed, pp_face_embeds[j]))
        while min(distances) != 999:
            dist = min(distances)
            closestIndex = distances.index(dist)
            if prediction_distances[closestIndex] == None or prediction_distances[closestIndex][0] > dist:
                if prediction_distances[closestIndex] != None:
                    del submission_dict[img_letter + "_" + str(prediction_distances[closestIndex][1])]
                prediction_distances[closestIndex] = (dist, i)
                prediction_names[closestIndex] = next(item for item in names if item["id"] == i)["name"]
                submission_dict[img_letter + "_" + str(i)] = boxes[closestIndex].detach().numpy()
                break
            distances[closestIndex] = 999
    return prediction_names

In [None]:
names = name_mapping()
#line below will take a while, that's normal
base_embeddings = set_base_embeddings()

In [None]:
url1 = '../c/2021-spring-coml-face-recognition-competition/a.jpg'
url2 = '../c/2021-spring-coml-face-recognition-competition/b.jpg'
url3 = '../c/2021-spring-coml-face-recognition-competition/c.jpg'
url4 = '../c/2021-spring-coml-face-recognition-competition/d.jpg'
plot_classes = ["person"]

submission_dict = {}
the_image = PIL.Image.open(url1)
scores, boxes = detect(the_image, face_detection_model, transform)
pp_face_embeds = []
for box in boxes:
    pp_face_embeds.append(facial_rec_base_model.predict(preprocess(the_image, box)))
predictions = predict_names(boxes, "a")
plot_predictions(the_image, scores, boxes, plot_classes, predictions)





the_image = PIL.Image.open(url2)
scores, boxes = detect(the_image, face_detection_model, transform)
pp_face_embeds = []
for box in boxes:
    pp_face_embeds.append(facial_rec_base_model.predict(preprocess(the_image, box)))
# predictions = []

# for box in boxes:
#   face_embeding = preprocess(the_image, box)
#   predictions.append(infer(face_embeding))
predictions = predict_names(boxes, "b")
plot_predictions(the_image, scores, boxes, plot_classes, predictions)






the_image = PIL.Image.open(url3)
scores, boxes = detect(the_image, face_detection_model, transform)
pp_face_embeds = []
for box in boxes:
    pp_face_embeds.append(facial_rec_base_model.predict(preprocess(the_image, box)))
# predictions = []

# for box in boxes:
#   face_embeding = preprocess(the_image, box)
#   predictions.append(infer(face_embeding))
predictions = predict_names(boxes, "c")
plot_predictions(the_image, scores, boxes, plot_classes, predictions)






the_image = PIL.Image.open(url4)
scores, boxes = detect(the_image, face_detection_model, transform)
pp_face_embeds = []
for box in boxes:
    pp_face_embeds.append(facial_rec_base_model.predict(preprocess(the_image, box)))
# predictions = []

# for box in boxes:
#   face_embeding = preprocess(the_image, box)
#   predictions.append(infer(face_embeding))
predictions = predict_names(boxes, "d")
plot_predictions(the_image, scores, boxes, plot_classes, predictions)

In [None]:
import csv

os.chdir("../../working")
with open('../input/c/2021-spring-coml-face-recognition-competition/kaggle_sample_submission.csv') as sample_submission_csv:
    with open("submission.csv", "w", newline = "") as submission_csv:
        sample_csv_reader = csv.DictReader(sample_submission_csv)
        open('submission.csv', 'w').close()
        submission_csv_writer = csv.DictWriter(submission_csv, sample_csv_reader.fieldnames)
        submission_csv_writer.writeheader()
        for row in sample_csv_reader:
            if row["id"] in submission_dict:
                submission_csv_writer.writerow({"id": row["id"], 
                                                "xmin": submission_dict[row["id"]][0], 
                                                "xmax": submission_dict[row["id"]][1], 
                                                "ymin": submission_dict[row["id"]][2], 
                                                "ymax": submission_dict[row["id"]][3]})
            else:
                submission_csv_writer.writerow({"id": row["id"], 
                                                "xmin": 0, 
                                                "xmax": 0, 
                                                "ymin": 0, 
                                                "ymax": 0})

In [None]:
ls ..