# Getting Embeddings

In [None]:
import torch
from torch.utils.data import DataLoader
from torchvision import transforms, datasets
import numpy as np
import pandas as pd
import torch.nn as nn
import cv2
from facenet_pytorch import MTCNN, InceptionResnetV1
from PIL import Image

# The model is running on CPU, since it is already pre-trained and doesnt require GPU
device = torch.device('cpu') 
print('Running on device: {}'.format(device))

#Define MTCNN module
#Since MTCNN is a collection of neural nets and other code, 
#The device must be passed in the following way to enable copying of objects when needed internally.
mtcnn = MTCNN(
    image_size=160, margin=0, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709, prewhiten=True,
    device=device
)
#Function takes 2 vectors 'a' and 'b'
#Returns the cosine similarity according to the definition of the dot product
def cos_sim(a, b):
    dot_product = np.dot(a, b)
    norm_a = np.linalg.norm(a)
    norm_b = np.linalg.norm(b)
    return dot_product / (norm_a * norm_b)

#cos_sim returns real numbers,where negative numbers have different interpretations.
#So we use this function to return only positive values.
def cos(a,b):
    minx = -1 
    maxx = 1
    return (cos_sim(a,b)- minx)/(maxx-minx)

# Define Inception Resnet V1 module (GoogLe Net)
resnet = InceptionResnetV1(pretrained='vggface2').eval().to(device)

# Define a dataset and data loader
dataset = datasets.ImageFolder('Film/Test')
dataset.idx_to_class = {i:c for c, i in dataset.class_to_idx.items()}
loader = DataLoader(dataset, collate_fn=lambda x: x[0])

#Perfom MTCNN facial detection
#Detects the face present in the image and prints the probablity of face detected in the image.
aligned = []
names = []
for x, y in loader:
    x_aligned, prob = mtcnn(x, return_prob=True)
    if x_aligned is not None:
        print('Face detected with probability: {:8f}'.format(prob))
        aligned.append(x_aligned)
        names.append(dataset.idx_to_class[y])

# Calculate the 512 face embeddings
aligned = torch.stack(aligned).to(device)
embeddings = resnet(aligned).cpu()

# Print distance matrix for classes.
#The embeddings are plotted in space and cosine distace is measured.
cos_sim = nn.CosineSimilarity(dim=-1, eps=1e-6)
for i in range(0,len(names)):
    emb=embeddings[i].unsqueeze(0)
    # The cosine similarity between the embeddings is given by 'dist'.
    dist =cos(embeddings[0],emb)  
        
dists = [[cos(e1,e2).item() for e2 in embeddings] for e1 in embeddings]
# The print statement below is
#Helpful for analysing the results and for determining the value of threshold.
print(pd.DataFrame(dists, columns=names, index=names)) 


# Face Recognition from Images

In [None]:
from facenet_pytorch import MTCNN, InceptionResnetV1,extract_face
from PIL import Image,ImageDraw
import torch
import cv2
import torch.nn as nn

#Takes 2 vectors 'a' and 'b'.
#Return the cosine similarity according to the definition of the dot product.
def cos_sim(a, b):
    dot_product = np.dot(a, b)
    norm_a = np.linalg.norm(a)
    norm_b = np.linalg.norm(b)
    return dot_product / (norm_a * norm_b)

#cos_sim returns real numbers,where negative numbers have different interpretations.
#So we use this function to return only positive values.
def cos(a,b):   
    minx = -1 
    maxx = 1
    return (cos_sim(a,b)- minx)/(maxx-minx)


#This is the function for doing face recognition.
def verify(embedding): 
    for i,k in enumerate(embeddings):
        for j,l in enumerate(embedding):
            #Computing Cosine distance.
            dist =cos(k,l)
               
            # Chosen threshold is 0.85. 
            #Threshold is determined after seeing the table in the previous cell.
            if dist > 0.85:
                #Name of the person identified is printed on the screen, as well as below the detecetd face (below the rectangular box).
                text=names[i]
                cv2.putText(im, text,(boxes[j][0].astype(int) ,boxes[j][3].astype(int) + 17), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255,255,255), 2)
                print(text)
                
#Model running on CPU           
device = torch.device('cpu')  

#Define Inception Resnet V1 module (GoogLe Net)
resnet = InceptionResnetV1(pretrained='vggface2').eval().to('cpu')

#Define MTCNN module
#Since MTCNN is a collection of neural nets and other code, 
#The device must be passed in the following way to enable copying of objects when needed internally.
#'keep_all' is kept True. All the faces present in the image will be detected.
mtcnn = MTCNN(
    image_size=160, margin=0, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709, prewhiten=True,
    device=device,keep_all=True
)

#Get cropped and prewhitened image tensor of PIL image.
img = Image.open('Film/Test/1.jpg')
img_cropped = mtcnn(img)
boxes,prob=mtcnn.detect(img)
img_draw = img.copy()
draw = ImageDraw.Draw(img_draw)

#Rectangular boxes are drawn on faces present in the image.
#The detected and cropped faces are then saved.
for i, box in enumerate(boxes):
    draw.rectangle(box.tolist())
    extract_face(img, box, save_path='Film/Test/Cropped_Face_{}.jpg'.format(i))
img_draw.save('Film/Test/Faces_Detected.jpg')

#Calculate embeddings of each cropped face and print it.
im=cv2.imread('Film/Test/Faces_Detected.jpg')
img_embedding = resnet(img_cropped)
print(img_embedding)

#print(size of img_embedding)
print(img_embedding.size())

#Call function verify. 
#Identify the person with the help of embeddings.
cos_sim = nn.CosineSimilarity(dim=-1, eps=1e-6)
verify(img_embedding)

#'Image' window opens.
#The PIL image now have rectangular boxes on detected faces.
#The identified faces have their respective name below the box.
cv2.imshow("Image",im)
k=cv2.waitKey(0)

#13 is for 'Enter' key.
#If 'Enter' key is pressed, all the windows are made to close forcefully.
if k==13:
    cv2.destroyAllWindows()    
    