In [1]:
from facenet_pytorch import MTCNN, InceptionResnetV1
import torch
from torch.utils.data import DataLoader
from torchvision import datasets
import numpy as np
import pandas as pd
import os

In [2]:
torch.cuda.is_available()

True

In [3]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda', index=0)

In [4]:
help(MTCNN)

Help on class MTCNN in module facenet_pytorch.models.mtcnn:

class MTCNN(torch.nn.modules.module.Module)
 |  MTCNN(
 |      image_size=160,
 |      margin=0,
 |      min_face_size=20,
 |      thresholds=[0.6, 0.7, 0.7],
 |      factor=0.709,
 |      post_process=True,
 |      select_largest=True,
 |      selection_method=None,
 |      keep_all=False,
 |      device=None
 |  )
 |
 |  MTCNN face detection module.
 |
 |  This class loads pretrained P-, R-, and O-nets and returns images cropped to include the face
 |  only, given raw input images of one of the following types:
 |      - PIL image or list of PIL images
 |      - numpy.ndarray (uint8) representing either a single image (3D) or a batch of images (4D).
 |  Cropped faces can optionally be saved to file
 |  also.
 |
 |  Keyword Arguments:
 |      image_size {int} -- Output image size in pixels. The image will be square. (default: {160})
 |      margin {int} -- Margin to add to bounding box, in terms of pixels in the final image.

In [5]:
mtcnn = MTCNN(
    image_size=160,
    margin=0,
    min_face_size=20, # minimum face size to look for in the crowd
    thresholds=[0.6, 0.7 ,0.7],
    factor=0.709,
    post_process=True,
    device=device
)

In [6]:
resnet = InceptionResnetV1(pretrained="vggface2").eval().to(device)

In [7]:
def collate_fn(x): return x[0]
dataset = datasets.ImageFolder('../../datasets/test_images')
dataset.idx_to_class = {i:c for c,i in dataset.class_to_idx.items()}
loader = DataLoader(dataset, collate_fn=collate_fn, num_workers=0)

In [8]:
aligned = []
names = []
for x, y in loader:
    x_aligned, prob = mtcnn(x, return_prob=True)
    if x_aligned is not None:
        print(f'face detected with probability: {prob:.8f}')
        aligned.append(x_aligned)
        names.append(dataset.idx_to_class[y])

face detected with probability: 0.99997389
face detected with probability: 0.99996948
face detected with probability: 0.99994218
face detected with probability: 0.99999821
face detected with probability: 0.99999988
face detected with probability: 0.99998868
face detected with probability: 0.99974996
face detected with probability: 0.99999833
face detected with probability: 0.99999905
face detected with probability: 0.99999487


In [9]:
aligned = torch.stack(aligned).to(device)

In [10]:
aligned.shape

torch.Size([10, 3, 160, 160])

In [11]:
embeddings = resnet(aligned).detach().cpu()

In [12]:
embeddings.shape

torch.Size([10, 512])

In [13]:
dists = [[(e1 - e2).norm().item() for e2 in embeddings] for e1 in embeddings]
print(pd.DataFrame(dists, columns=names, index=names))

             Dai       Dai       Dai       Dai       Dai    Nguyen    Nguyen  \
Dai     0.000000  0.400309  0.555568  0.581336  0.600564  0.914892  0.881053   
Dai     0.400309  0.000000  0.607594  0.613061  0.721005  0.939572  0.914429   
Dai     0.555568  0.607594  0.000000  0.384019  0.484317  0.920384  0.951620   
Dai     0.581336  0.613061  0.384019  0.000000  0.356994  0.935647  0.953184   
Dai     0.600564  0.721005  0.484317  0.356994  0.000000  0.921019  0.910064   
Nguyen  0.914892  0.939572  0.920384  0.935647  0.921019  0.000000  0.492816   
Nguyen  0.881053  0.914429  0.951620  0.953184  0.910064  0.492816  0.000000   
Nguyen  0.907339  0.957075  0.992856  1.017900  0.967756  0.475349  0.588623   
Nguyen  0.981639  0.958563  1.094744  1.107791  1.087863  0.521012  0.610636   
Nguyen  0.886306  0.923264  0.997587  1.000757  0.945444  0.508669  0.645533   

          Nguyen    Nguyen    Nguyen  
Dai     0.907339  0.981639  0.886306  
Dai     0.957075  0.958563  0.923264  
Da