# Face Identification

안면 인식 관련하여 크게, Face Detection과 Face Identification으로 나뉜다. 둘은 비슷하지만 다르다.

## Import Libraries

현재 이 코드가 적힌 파일 위치에 facenet_pytorch이라는 폴더가 있고, 이를 모듈처럼 불러와 사용하는데,

이것도 `__init__.py`라는 파일이 있어야 이렇게 모듈처럼 불러올 수 있고, 동일 폴더 내에 있어야 하는 등 여러 조건이 있다.

In [None]:
# 구글이 공개한 facenet이라는 AI 모델이 있는데, tensorflow로 작성된 것을 pytorch 형태로 공개하고 있다.
# https://github.com/timesler/facenet-pytorch
from facenet_pytorch import MTCNN, InceptionResnetV1, fixed_image_standardization, training

In [None]:
import torch
from torch.utils.data import DataLoader, SubsetRandomSampler
from torch import optim
from torch.optim.lr_scheduler import MultiStepLR
from torch.utils.tensorboard import SummaryWriter
from torchvision import datasets, transforms
import numpy as np
import pandas as pd
import os

## Import the Data

우리가 봤었던 MNIST의 경우, 데이터가 이미지인데 excel에 숫자로 적혀 있어서 pandas로 불러와서 이를 torch로 바꿔주었으나,

`featuresTrain = torch.from_numpy(features_train)`

`targetsTrain = torch.from_numpy(target_train).type(torch.LongTensor)`

`train = torch.utils.data.TensorDataset(featuresTrain,targetsTrain)`

여기서는 이미지 파일이기 때문에 torchvision.datasets이라는 내장된 모듈을 사용해 간편하게 불러온다.

그리고 한번 이미지를 align 해야 하는 과정이 필요하다.

`dataset = datasets.ImageFolder(data_dir, transform=transforms.Resize((512, 512)))`

#### Define run parameters

The dataset should follow the VGGFace2/ImageNet-style directory layout. Modify `data_dir` to the location of the dataset on wish to finetune on.

보통 input data 위치 및 하이퍼파라미터를 설정하는 것인데, 하이퍼 파라미터란 조정하는 수치값을 의미한다.

epoch의 경우 같은 데이터로 학습을 몇 번 돌릴 것인지를 의미한다.

In [None]:
data_dir = './data/train'

batch_size = 8
epochs = 16
workers = 0 if os.name == 'nt' else 4

#### Determine if an nvidia GPU is available

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('Running on device: {}'.format(device))

#### Define MTCNN module

See `help(MTCNN)` for more details.

In [None]:
mtcnn = MTCNN(
    image_size=160, margin=0, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709,
    device=device
)

#### Perfom MTCNN facial detection

Iterate through the DataLoader object and obtained cropped faces.

In [None]:
dataset = datasets.ImageFolder(data_dir, transform=transforms.Resize((512, 512)))
dataset.samples = [
    (p, p.replace(data_dir, data_dir + '_aligned')) for p, _ in dataset.samples
]
        
loader = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=batch_size,
    collate_fn=training.collate_pil
)

for i, (x, y) in enumerate(loader):
    mtcnn(x, save_path=y)
    print('\rBatch {} of {}'.format(i + 1, len(loader)), end='')
    
# Remove mtcnn to reduce GPU memory usage
del mtcnn

#### Define Inception Resnet V1 module

See `help(InceptionResnetV1)` for more details.

In [None]:
resnet = InceptionResnetV1(
    classify=False,
    pretrained='vggface2'
).to(device)

#### Define optimizer, scheduler, dataset, and dataloader

In [None]:
optimizer = optim.Adam(resnet.parameters(), lr=0.001)
scheduler = MultiStepLR(optimizer, [5, 10])

trans = transforms.Compose([
    np.float32,
    transforms.ToTensor(),
    fixed_image_standardization
])

dataset = datasets.ImageFolder(data_dir + '_aligned', transform=trans)
img_inds = np.arange(len(dataset))
np.random.shuffle(img_inds)
train_inds = img_inds[:int(0.8 * len(img_inds))]

train_loader = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=batch_size,
    sampler=SubsetRandomSampler(train_inds)
)

#### Define loss and evaluation functions

In [None]:
loss_fn = torch.nn.CrossEntropyLoss()
metrics = {
    'fps': training.BatchTimer(),
    'acc': training.accuracy
}

#### Train model

In [None]:
writer = SummaryWriter()
writer.iteration, writer.interval = 0, 10

print('\n\nInitial')
print('-' * 10)
resnet.eval()
training.pass_epoch(
    resnet, loss_fn, val_loader,
    batch_metrics=metrics, show_running=True, device=device,
    writer=writer
)

for epoch in range(epochs):
    print('\nEpoch {}/{}'.format(epoch + 1, epochs))
    print('-' * 10)

    resnet.train()
    training.pass_epoch(
        resnet, loss_fn, train_loader, optimizer, scheduler,
        batch_metrics=metrics, show_running=True, device=device,
        writer=writer
    )

    resnet.eval()
    training.pass_epoch(
        resnet, loss_fn, val_loader,
        batch_metrics=metrics, show_running=True, device=device,
        writer=writer
    )

writer.close()

In [None]:
loader = DataLoader(dataset)

X = []
y = []
for x, idx in loader:
    X += resnet(x.cuda()).tolist()
    y += idx.tolist()

In [None]:
len(X[0])

In [None]:
from sklearn import svm

In [None]:
clf = svm.SVC(kernel='linear', probability=True)
clf.fit(X, y)

#### Test Model

In [None]:
mtcnn = MTCNN(
    image_size=160, margin=0, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709,
    device=device
)

In [None]:
data_dir = './data/test'

dataset = datasets.ImageFolder(data_dir, transform=transforms.Resize((512, 512)))
dataset.samples = [
    (p, p.replace(data_dir, data_dir + '_aligned')) for p, _ in dataset.samples
]

loader = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=batch_size,
    collate_fn=training.collate_pil
)

for i, (x, y) in enumerate(loader):
    mtcnn(x, save_path=y)
    print('\rBatch {} of {}'.format(i + 1, len(loader)), end='')
    
# Remove mtcnn to reduce GPU memory usage
del mtcnn

In [None]:
# trans = transforms.Compose([
#     transforms.RandomHorizontalFlip(),
#     transforms.ToTensor(),
#     transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
#     transforms.RandomErasing(),
# ])

trans = transforms.Compose([
    np.float32,
    transforms.ToTensor(),
    fixed_image_standardization
])

dataset = datasets.ImageFolder(data_dir + '_aligned', transform=trans)

In [None]:
loader = DataLoader(dataset)

X = []
y = []
for x, idx in loader:
    X += resnet(x.cuda()).tolist()
    y += idx.tolist()

In [None]:
dataset

In [None]:
for i in clf.predict_proba(X):
    print(np.argmax(i), i)

In [None]:
len(y)

#### Make Classification Model with SVM

In [None]:
data_loader = DataLoader(dataset)

In [None]:
X = []
y = []
for i in data_loader:
    X += resnet(i[0].cuda()).tolist()
    y.append(i[1].cuda().tolist())

In [None]:
from sklearn import svm

In [None]:
clf = svm.SVC(kernel='linear', probability=True)
clf.fit(X, y)

In [6]:
mtcnn = MTCNN(
    image_size=160, margin=0, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709,
    device=device
)

In [None]:
def collate_fn(x):
    return x[0]

dataset = datasets.ImageFolder('./data/test')
dataset.idx_to_class = {i:c for c, i in dataset.class_to_idx.items()}
loader = DataLoader(dataset, collate_fn=collate_fn, num_workers=workers)

In [None]:
aligned = []
names = []
for x, y in loader:
    x_aligned, prob = mtcnn(x, return_prob=True)
    if x_aligned is not None:
        print('Face detected with probability: {:8f}'.format(prob))
        aligned.append(x_aligned)
        names.append(dataset.idx_to_class[y])

In [None]:
type(aligned)

In [None]:
aligned = torch.stack(aligned).to(device)
embeddings = resnet(aligned.cuda()).detach().cpu()

In [None]:
len(embeddings.tolist()[0])

In [None]:
print(clf.predict_proba(embeddings.tolist()))
print(names)
print(dataset.idx_to_class)

In [None]:
resnet

In [21]:
from __future__ import print_function, division

from facenet_pytorch import MTCNN, InceptionResnetV1, fixed_image_standardization, training
import torch
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import numpy as np
import pickle
import os

import warnings
warnings.filterwarnings("ignore")

workers = 0 if os.name == 'nt' else 4

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

def collate_fn(x):
    return x[0]

trans = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomGrayscale(p=0.6)
])

dataset = datasets.ImageFolder("./data/train", transform=trans)
dataset.idx_to_class = {i:c for c, i in dataset.class_to_idx.items()}
loader = DataLoader(dataset, collate_fn=collate_fn, num_workers=workers)

In [22]:
for x, y in loader:
    print(x)

<PIL.Image.Image image mode=RGB size=490x489 at 0x2187205F648>
<PIL.Image.Image image mode=RGB size=500x501 at 0x2189D62EB48>
<PIL.Image.Image image mode=RGB size=619x713 at 0x2187205F908>
<PIL.Image.Image image mode=RGB size=407x355 at 0x2189D62EB48>
<PIL.Image.Image image mode=RGB size=455x484 at 0x2189D62E448>
<PIL.Image.Image image mode=RGB size=444x481 at 0x2187205FE88>
<PIL.Image.Image image mode=RGB size=511x607 at 0x2189D62EA48>
<PIL.Image.Image image mode=RGB size=438x534 at 0x2189D62EB48>
<PIL.Image.Image image mode=RGB size=446x588 at 0x2187205F908>
<PIL.Image.Image image mode=RGB size=450x513 at 0x2189D62E348>
<PIL.Image.Image image mode=RGB size=640x480 at 0x2187205F488>
<PIL.Image.Image image mode=RGB size=434x512 at 0x2189D62EF08>
<PIL.Image.Image image mode=RGB size=353x505 at 0x2189D62E348>
<PIL.Image.Image image mode=RGB size=382x409 at 0x2189D62EA48>
<PIL.Image.Image image mode=RGB size=390x569 at 0x2189D62EF08>
<PIL.Image.Image image mode=RGB size=550x702 at 0x21872

In [None]:
resnet = InceptionResnetV1(pretrained='vggface2', classify=True).eval().to(device)

In [None]:
aligned, prob = mtcnn(image, return_prob=True)
if x_aligned is not None:
    print('Face detected with probability: {:8f}'.format(prob))

In [None]:
aligned = torch.stack([aligned]).to(device)
embeddings = resnet(aligned).detach().cpu()

In [None]:
embeddings

In [None]:
with open("classifier.pkl", 'rb') as infile:
    (model, names) = pickle.load(infile)

result = model.predict_proba(embeddings)

In [None]:
result[0]

In [None]:
max(result[0])

In [None]:
print(names[np.argmax(result[0])])

In [27]:
os.listdir("../face-login-server/data/test")

[]