# Face Identification

안면 인식 관련하여 크게, Face Detection과 Face Identification으로 나뉜다. 둘은 비슷하지만 다르다.

## Import Libraries

현재 이 코드가 적힌 파일 위치에 facenet_pytorch이라는 폴더가 있고, 이를 모듈처럼 불러와 사용하는데,

이것도 `__init__.py`라는 파일이 있어야 이렇게 모듈처럼 불러올 수 있고, 동일 폴더 내에 있어야 하는 등 여러 조건이 있다.

In [1]:
# 구글이 공개한 facenet이라는 AI 모델이 있는데, tensorflow로 작성된 것을 pytorch 형태로 공개하고 있다.
# https://github.com/timesler/facenet-pytorch
from facenet_pytorch import MTCNN, InceptionResnetV1, fixed_image_standardization, training

In [2]:
import torch
from torch.utils.data import DataLoader, SubsetRandomSampler
from torch import optim
from torch.optim.lr_scheduler import MultiStepLR
from torch.utils.tensorboard import SummaryWriter
from torchvision import datasets, transforms
import numpy as np
import pandas as pd
import os

## Import the Data

우리가 봤었던 MNIST의 경우, 데이터가 이미지인데 excel에 숫자로 적혀 있어서 pandas로 불러와서 이를 torch로 바꿔주었으나,

`featuresTrain = torch.from_numpy(features_train)`

`targetsTrain = torch.from_numpy(target_train).type(torch.LongTensor)`

`train = torch.utils.data.TensorDataset(featuresTrain,targetsTrain)`

여기서는 이미지 파일이기 때문에 torchvision.datasets이라는 내장된 모듈을 사용해 간편하게 불러온다.

그리고 한번 이미지를 align 해야 하는 과정이 필요하다.

`dataset = datasets.ImageFolder(data_dir, transform=transforms.Resize((512, 512)))`

#### Define run parameters

The dataset should follow the VGGFace2/ImageNet-style directory layout. Modify `data_dir` to the location of the dataset on wish to finetune on.

보통 input data 위치 및 하이퍼파라미터를 설정하는 것인데, 하이퍼 파라미터란 조정하는 수치값을 의미한다.

epoch의 경우 같은 데이터로 학습을 몇 번 돌릴 것인지를 의미한다.

In [3]:
data_dir = './data/train'

batch_size = 8
epochs = 16
workers = 0 if os.name == 'nt' else 4

#### Determine if an nvidia GPU is available

In [4]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('Running on device: {}'.format(device))

Running on device: cuda:0


#### Define MTCNN module

See `help(MTCNN)` for more details.

In [5]:
mtcnn = MTCNN(
    image_size=160, margin=0, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709,
    device=device
)

#### Perfom MTCNN facial detection

Iterate through the DataLoader object and obtained cropped faces.

In [6]:
dataset = datasets.ImageFolder(data_dir, transform=transforms.Resize((512, 512)))
dataset.samples = [
    (p, p.replace(data_dir, data_dir + '_aligned')) for p, _ in dataset.samples
]
        
loader = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=batch_size,
    collate_fn=training.collate_pil
)

for i, (x, y) in enumerate(loader):
    mtcnn(x, save_path=y)
    print('\rBatch {} of {}'.format(i + 1, len(loader)), end='')
    
# Remove mtcnn to reduce GPU memory usage
del mtcnn

Batch 7 of 7

#### Define Inception Resnet V1 module

See `help(InceptionResnetV1)` for more details.

In [7]:
resnet = InceptionResnetV1(
    classify=False,
    pretrained='vggface2'
).to(device)

#### Define optimizer, scheduler, dataset, and dataloader

In [9]:
optimizer = optim.Adam(resnet.parameters(), lr=0.001)
scheduler = MultiStepLR(optimizer, [5, 10])

trans = transforms.Compose([
    np.float32,
    transforms.ToTensor(),
    fixed_image_standardization
])

dataset = datasets.ImageFolder(data_dir + '_aligned', transform=trans)
img_inds = np.arange(len(dataset))
np.random.shuffle(img_inds)
train_inds = img_inds[:int(0.8 * len(img_inds))]

train_loader = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=batch_size,
    sampler=SubsetRandomSampler(train_inds)
)

#### Define loss and evaluation functions

In [10]:
loss_fn = torch.nn.CrossEntropyLoss()
metrics = {
    'fps': training.BatchTimer(),
    'acc': training.accuracy
}

#### Train model

In [11]:
writer = SummaryWriter()
writer.iteration, writer.interval = 0, 10

print('\n\nInitial')
print('-' * 10)
resnet.eval()
training.pass_epoch(
    resnet, loss_fn, val_loader,
    batch_metrics=metrics, show_running=True, device=device,
    writer=writer
)

for epoch in range(epochs):
    print('\nEpoch {}/{}'.format(epoch + 1, epochs))
    print('-' * 10)

    resnet.train()
    training.pass_epoch(
        resnet, loss_fn, train_loader, optimizer, scheduler,
        batch_metrics=metrics, show_running=True, device=device,
        writer=writer
    )

    resnet.eval()
    training.pass_epoch(
        resnet, loss_fn, val_loader,
        batch_metrics=metrics, show_running=True, device=device,
        writer=writer
    )

writer.close()



Initial
----------
Valid |     2/2    | loss:    6.2356 | fps:   62.4435 | acc:    0.0000   

Epoch 1/16
----------
Train |     5/5    | loss:    6.2091 | fps:   32.3015 | acc:    0.0000   
Valid |     2/2    | loss:    6.2459 | fps:   81.6830 | acc:    0.0000   

Epoch 2/16
----------
Train |     5/5    | loss:    6.1747 | fps:   34.0322 | acc:    0.0500   
Valid |     2/2    | loss:    6.2643 | fps:   85.5678 | acc:    0.0000   

Epoch 3/16
----------
Train |     5/5    | loss:    6.1530 | fps:   33.1316 | acc:    0.2250   
Valid |     2/2    | loss:    6.2238 | fps:   86.8955 | acc:    0.0000   

Epoch 4/16
----------
Train |     5/5    | loss:    6.1396 | fps:   34.1778 | acc:    0.3500   
Valid |     2/2    | loss:    6.2095 | fps:   86.5058 | acc:    0.0000   

Epoch 5/16
----------
Train |     5/5    | loss:    6.1118 | fps:   34.0431 | acc:    0.3750   
Valid |     2/2    | loss:    6.2091 | fps:   85.9447 | acc:    0.0000   

Epoch 6/16
----------
Train |     5/5    | loss: 

In [12]:
loader = DataLoader(dataset)

X = []
y = []
for x, idx in loader:
    X += resnet(x.cuda()).tolist()
    y += idx.tolist()

In [13]:
len(X[0])

512

In [14]:
from sklearn import svm

In [15]:
clf = svm.SVC(kernel='linear', probability=True)
clf.fit(X, y)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
    kernel='linear', max_iter=-1, probability=True, random_state=None,
    shrinking=True, tol=0.001, verbose=False)

#### Test Model

In [16]:
mtcnn = MTCNN(
    image_size=160, margin=0, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709,
    device=device
)

In [17]:
data_dir = './data/test'

dataset = datasets.ImageFolder(data_dir, transform=transforms.Resize((512, 512)))
dataset.samples = [
    (p, p.replace(data_dir, data_dir + '_aligned')) for p, _ in dataset.samples
]

loader = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=batch_size,
    collate_fn=training.collate_pil
)

for i, (x, y) in enumerate(loader):
    mtcnn(x, save_path=y)
    print('\rBatch {} of {}'.format(i + 1, len(loader)), end='')
    
# Remove mtcnn to reduce GPU memory usage
del mtcnn

Batch 3 of 3

In [18]:
# trans = transforms.Compose([
#     transforms.RandomHorizontalFlip(),
#     transforms.ToTensor(),
#     transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
#     transforms.RandomErasing(),
# ])

trans = transforms.Compose([
    np.float32,
    transforms.ToTensor(),
    fixed_image_standardization
])

dataset = datasets.ImageFolder(data_dir + '_aligned', transform=trans)

In [19]:
loader = DataLoader(dataset)

X = []
y = []
for x, idx in loader:
    X += resnet(x.cuda()).tolist()
    y += idx.tolist()

In [20]:
dataset

Dataset ImageFolder
    Number of datapoints: 20
    Root location: ./data/test_aligned
    StandardTransform
Transform: Compose(
               <class 'numpy.float32'>
               ToTensor()
               <function fixed_image_standardization at 0x0000023C37E438B8>
           )

In [21]:
for i in clf.predict_proba(X):
    print(np.argmax(i), i)

0 [0.30055788 0.02490051 0.02573857 0.04260679 0.21815553 0.08728969
 0.03834279 0.04496593 0.0838665  0.1335758 ]
0 [0.31107857 0.02596456 0.02509281 0.04434098 0.21068838 0.09121571
 0.03918821 0.04753018 0.07921664 0.12568396]
2 [0.01183863 0.26863007 0.28355849 0.07211787 0.0258185  0.13409971
 0.03543401 0.07216579 0.04676885 0.04956806]
6 [0.01928681 0.10450515 0.06992751 0.1242891  0.04258471 0.11917343
 0.25670774 0.15940561 0.05165095 0.052469  ]
2 [0.01633533 0.17870752 0.35313681 0.1463165  0.02100618 0.11444328
 0.03133099 0.07277736 0.03777731 0.02816873]
2 [0.03092981 0.07983405 0.25048123 0.07985833 0.07125863 0.12404964
 0.04505636 0.05077464 0.14366291 0.12409438]
3 [0.04549708 0.05258967 0.07284142 0.33017559 0.06539719 0.09841519
 0.08666469 0.08448298 0.06652991 0.09740627]
3 [0.02693715 0.08008128 0.14422778 0.37229591 0.03703972 0.11820099
 0.06892007 0.06253369 0.04945537 0.04030804]
4 [0.15856936 0.03572425 0.0331934  0.03883711 0.29839678 0.09044003
 0.03305128

In [22]:
len(y)

20

#### Make Classification Model with SVM

In [None]:
data_loader = DataLoader(dataset)

In [None]:
X = []
y = []
for i in data_loader:
    X += resnet(i[0].cuda()).tolist()
    y.append(i[1].cuda().tolist())

In [None]:
from sklearn import svm

In [None]:
clf = svm.SVC(kernel='linear', probability=True)
clf.fit(X, y)

In [None]:
mtcnn = MTCNN(
    image_size=160, margin=0, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709,
    device=device
)

In [None]:
def collate_fn(x):
    return x[0]

dataset = datasets.ImageFolder('./data/test')
dataset.idx_to_class = {i:c for c, i in dataset.class_to_idx.items()}
loader = DataLoader(dataset, collate_fn=collate_fn, num_workers=workers)

In [None]:
aligned = []
names = []
for x, y in loader:
    x_aligned, prob = mtcnn(x, return_prob=True)
    if x_aligned is not None:
        print('Face detected with probability: {:8f}'.format(prob))
        aligned.append(x_aligned)
        names.append(dataset.idx_to_class[y])

In [None]:
type(aligned)

In [None]:
aligned = torch.stack(aligned).to(device)
embeddings = resnet(aligned.cuda()).detach().cpu()

In [None]:
len(embeddings.tolist()[0])

In [None]:
print(clf.predict_proba(embeddings.tolist()))
print(names)
print(dataset.idx_to_class)

In [None]:
resnet

In [1]:
from __future__ import print_function, division

from facenet_pytorch import MTCNN, InceptionResnetV1, fixed_image_standardization, training
import torch
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import numpy as np
import pickle
import os

import warnings
warnings.filterwarnings("ignore")

workers = 0 if os.name == 'nt' else 4

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

def collate_fn(x):
    return x[0]

trans = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
    transforms.RandomErasing(),
])

dataset = datasets.ImageFolder("../data/train", transform=trans)
dataset.idx_to_class = {i:c for c, i in dataset.class_to_idx.items()}
loader = DataLoader(dataset, collate_fn=collate_fn, num_workers=workers)

In [4]:
for x in loader:
    print(x)
    

TypeError: tensor should be a torch tensor. Got <class 'PIL.Image.Image'>.