## Install FaceNet

**With pip:**
`!sudo pip install facenet-pytorch`

**or clone this repo, removing the '-' to allow python imports:**

`!sudo git clone https://github.com/timesler/facenet-pytorch.git facenet_pytorch`

**or use a docker container (see https://github.com/timesler/docker-jupyter-dl-gpu):**

`!sudo docker run -it --rm timesler/jupyter-dl-gpu pip install facenet-pytorch && ipython`

## Get data
`!sudo wget https://download.openmmlab.com/datasets/movienet/poster4M.img_meta.v1.json`

In [None]:
import json
import shutil
from pathlib import Path

from imdb import IMDb
import requests

img_meta = json.load(open('poster4M.img_meta.v1.json'))
ia = IMDb()

p = Path.cwd() / 'data'
shutil.rmtree(p, ignore_errors=True)
p.mkdir(parents=True, exist_ok=True)

i = 1
subsample = {}
for image in img_meta:
    if (img_meta[image]['type'] == 'event'    # Download only publicity type ~360K
            and len(img_meta[image]['cast']) == 1):    # Only one person in photo
        name_id = img_meta[image]['cast'][0][2:]
        name = ia.get_person(name_id)['name']
        url = img_meta[image]['url']

        try:
            r = requests.get(url)
        except:
            json.dump(subsample, open('subsample10K_meta.json', 'w'), indent=6)

        img = r.content
        if img == b'Not Found':
            continue

        folder = p / name
        folder.mkdir(parents=True, exist_ok=True)
        filepath = folder / f'{image}.jpg'
        with filepath.open('wb') as out_file:
            out_file.write(img)
        i += 1
        subsample[image] = img_meta[image]

    if i == 1000:
        break

json.dump(subsample, open('subsample10K_meta.json', 'w'), indent=6)

## Face detection

In [1]:
from pathlib import Path

from facenet_pytorch import MTCNN, InceptionResnetV1
import pandas as pd
import torch
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [2]:
def collate_fn(x):
    return x[0]

dataset = datasets.ImageFolder(Path.cwd() / 'data_duplicated')
dataset.idx_to_class = {i:c for c, i in dataset.class_to_idx.items()}
loader = DataLoader(dataset, collate_fn=collate_fn)

# If required, create a face detection pipeline using MTCNN:
mtcnn = MTCNN(image_size=160, margin=32, device='cuda:0')

# Create an inception resnet (in eval mode):
resnet = InceptionResnetV1(pretrained='vggface2').eval().to('cuda:0')

aligned = []
names = []

i = 0
for x, y in loader:
    x_aligned = mtcnn(x)
    if x_aligned is not None:
        aligned.append(x_aligned)
        names.append(dataset.idx_to_class[y])
        i += 1
    if i == 10:
        break

aligned = torch.stack(aligned).to('cuda:0')
embeddings = resnet(aligned).detach().cpu()

dists = [[(e1 - e2).norm().item() for e2 in embeddings] for e1 in embeddings]
print(pd.DataFrame(dists, columns=names, index=names))

In [21]:
aligned = torch.stack(aligned).to('cuda:0')
embeddings = resnet(aligned).detach().cpu()

dists = [[(e1 - e2).norm().item() for e2 in embeddings] for e1 in embeddings]
print(pd.DataFrame(dists, columns=names, index=names))

           nm0000006  nm0000006  nm0000007  nm0000007  nm0000008  nm0000008  \
nm0000006   0.000000   1.366138   1.090984   1.152431   1.230796   1.392182   
nm0000006   1.366138   0.000000   1.368804   1.332926   1.372383   1.377241   
nm0000007   1.090984   1.368804   0.000000   1.123659   1.151212   1.385094   
nm0000007   1.152431   1.332926   1.123659   0.000000   1.156646   1.435622   
nm0000008   1.230796   1.372383   1.151212   1.156646   0.000000   1.353558   
nm0000008   1.392182   1.377241   1.385094   1.435622   1.353558   0.000000   
nm0000009   1.258769   1.276305   1.336190   1.171455   1.308255   1.314605   
nm0000011   0.961303   1.305481   1.333975   1.214338   1.162395   1.238129   
nm0000011   1.096124   1.427237   1.240264   1.298536   1.464159   1.298635   
nm0000012   1.396839   1.392042   1.388325   1.194588   1.355340   1.276682   

           nm0000009  nm0000011  nm0000011  nm0000012  
nm0000006   1.258769   0.961303   1.096124   1.396839  
nm0000006   1.2763