# Face Identification

안면 인식 관련하여 크게, Face Detection과 Face Identification으로 나뉜다. 둘은 비슷하지만 다르다.

## Import Libraries

현재 이 코드가 적힌 파일 위치에 facenet_pytorch이라는 폴더가 있고, 이를 모듈처럼 불러와 사용하는데,

이것도 `__init__.py`라는 파일이 있어야 이렇게 모듈처럼 불러올 수 있고, 동일 폴더 내에 있어야 하는 등 여러 조건이 있다.

In [1]:
# 구글이 공개한 facenet이라는 AI 모델이 있는데, tensorflow로 작성된 것을 pytorch 형태로 공개하고 있다.
# https://github.com/timesler/facenet-pytorch
from facenet_pytorch import MTCNN, InceptionResnetV1, fixed_image_standardization, training

In [2]:
import torch
from torch.utils.data import DataLoader, SubsetRandomSampler
from torch import optim
from torch.optim.lr_scheduler import MultiStepLR
from torch.utils.tensorboard import SummaryWriter
from torchvision import datasets, transforms
import numpy as np
import pandas as pd
import os

## Import the Data

우리가 봤었던 MNIST의 경우, 데이터가 이미지인데 excel에 숫자로 적혀 있어서 pandas로 불러와서 이를 torch로 바꿔주었으나,

`featuresTrain = torch.from_numpy(features_train)`

`targetsTrain = torch.from_numpy(target_train).type(torch.LongTensor)`

`train = torch.utils.data.TensorDataset(featuresTrain,targetsTrain)`

여기서는 이미지 파일이기 때문에 torchvision.datasets이라는 내장된 모듈을 사용해 간편하게 불러온다.

그리고 한번 이미지를 align 해야 하는 과정이 필요하다.

`dataset = datasets.ImageFolder(data_dir, transform=transforms.Resize((512, 512)))`

#### Define run parameters

The dataset should follow the VGGFace2/ImageNet-style directory layout. Modify `data_dir` to the location of the dataset on wish to finetune on.

보통 input data 위치 및 하이퍼파라미터를 설정하는 것인데, 하이퍼 파라미터란 조정하는 수치값을 의미한다.

epoch의 경우 같은 데이터로 학습을 몇 번 돌릴 것인지를 의미한다.

In [3]:
data_dir = './data/train'

batch_size = 8
epochs = 16
workers = 0 if os.name == 'nt' else 4

#### Determine if an nvidia GPU is available

In [4]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('Running on device: {}'.format(device))

Running on device: cuda:0


#### Define MTCNN module

See `help(MTCNN)` for more details.

In [5]:
mtcnn = MTCNN(
    image_size=160, margin=0, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709,
    device=device
)

In [6]:
resnet = InceptionResnetV1(pretrained='vggface2', classify=True).eval().to(device)

In [7]:
def collate_fn(x):
    return x[0]

dataset = datasets.ImageFolder(data_dir)
dataset.idx_to_class = {i:c for c, i in dataset.class_to_idx.items()}
loader = DataLoader(dataset, collate_fn=collate_fn, num_workers=workers)

In [8]:
aligned = []
names = []
for x, y in loader:
    x_aligned, prob = mtcnn(x, return_prob=True)
    if x_aligned is not None:
        print('Face detected with probability: {:8f}'.format(prob))
        aligned.append(x_aligned)
        names.append(dataset.idx_to_class[y])

Face detected with probability: 0.998145
Face detected with probability: 0.999908
Face detected with probability: 0.999996
Face detected with probability: 0.998552
Face detected with probability: 0.993009
Face detected with probability: 0.999828
Face detected with probability: 0.999999
Face detected with probability: 0.999686
Face detected with probability: 0.999639
Face detected with probability: 0.999997
Face detected with probability: 0.999999
Face detected with probability: 0.999991
Face detected with probability: 0.999993
Face detected with probability: 0.999198
Face detected with probability: 0.999426
Face detected with probability: 0.999996
Face detected with probability: 0.999999
Face detected with probability: 0.999957
Face detected with probability: 0.999995
Face detected with probability: 0.999917
Face detected with probability: 0.999325
Face detected with probability: 0.999985
Face detected with probability: 0.998141
Face detected with probability: 0.999952
Face detected wi

In [9]:
aligned = torch.stack(aligned).to(device)
embeddings = resnet(aligned).detach().cpu()

In [10]:
embeddings.size()

torch.Size([50, 8631])

In [11]:
dists = [[(e1 - e2).norm().item() for e2 in embeddings] for e1 in embeddings]
print(pd.DataFrame(dists, columns=names, index=names))

                   IU          IU          IU          IU          IU  \
IU           0.000000  107.882195  116.826225  109.790085  127.843582   
IU         107.882195    0.000000  101.853210   84.961960  129.613327   
IU         116.826225  101.853210    0.000000  100.785233  114.849182   
IU         109.790085   84.961960  100.785233    0.000000  126.902893   
IU         127.843582  129.613327  114.849182  126.902893    0.000000   
chanyoung  273.037567  240.848907  248.341812  265.641998  263.893494   
chanyoung  256.509705  234.328918  237.744766  254.580994  247.749664   
chanyoung  245.370026  225.247025  234.826843  236.462616  224.891830   
chanyoung  257.587891  230.464661  245.854233  244.322739  230.348587   
chanyoung  240.291946  236.347443  252.300751  236.352371  233.436066   
eunwoo     251.081299  253.120438  258.984436  262.336182  252.492950   
eunwoo     242.172272  241.057724  255.562332  245.530899  242.004196   
eunwoo     215.473160  221.107590  230.559464  232.

[50 rows x 50 columns]


In [12]:
len(embeddings.tolist())
names
# dataset.class_to_idx

['IU',
 'IU',
 'IU',
 'IU',
 'IU',
 'chanyoung',
 'chanyoung',
 'chanyoung',
 'chanyoung',
 'chanyoung',
 'eunwoo',
 'eunwoo',
 'eunwoo',
 'eunwoo',
 'eunwoo',
 'gaeri',
 'gaeri',
 'gaeri',
 'gaeri',
 'gaeri',
 'hayoung',
 'hayoung',
 'hayoung',
 'hayoung',
 'hayoung',
 'jaewon',
 'jaewon',
 'jaewon',
 'jaewon',
 'jaewon',
 'jonggook',
 'jonggook',
 'jonggook',
 'jonggook',
 'jonggook',
 'min',
 'min',
 'min',
 'min',
 'min',
 'soohyang',
 'soohyang',
 'soohyang',
 'soohyang',
 'soohyang',
 'younha',
 'younha',
 'younha',
 'younha',
 'younha']

In [13]:
idxs = []
for n in names:
    idxs.append(dataset.class_to_idx[n])

In [14]:
from sklearn import svm

In [15]:
clf = svm.SVC(kernel='linear', probability=True)
clf.fit(embeddings.tolist(), idxs)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
    kernel='linear', max_iter=-1, probability=True, random_state=None,
    shrinking=True, tol=0.001, verbose=False)

### Test

In [16]:
data_dir = './data/test'

batch_size = 8
epochs = 16
workers = 0 if os.name == 'nt' else 4

In [17]:
mtcnn = MTCNN(
    image_size=160, margin=0, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709,
    device=device
)

In [18]:
resnet = InceptionResnetV1(pretrained='vggface2', classify=True).eval().to(device)

In [19]:
def collate_fn(x):
    return x[0]

dataset = datasets.ImageFolder(data_dir)
dataset.idx_to_class = {i:c for c, i in dataset.class_to_idx.items()}
loader = DataLoader(dataset, collate_fn=collate_fn, num_workers=workers)

In [20]:
aligned = []
names = []
for x, y in loader:
    x_aligned, prob = mtcnn(x, return_prob=True)
    if x_aligned is not None:
        aligned.append(x_aligned)
        names.append(dataset.idx_to_class[y])

In [21]:
aligned = torch.stack(aligned).to(device)
embeddings = resnet(aligned).detach().cpu()

In [22]:
for i in clf.predict_proba(embeddings.tolist()):
    print(max(i))
    print(np.argmax(i))

0.3700292943130865
0
0.25953407422929314
0
0.4593012523801784
1
0.4175110310458948
1
0.3905814114825507
2
0.40632669030914936
2
0.492342347191165
3
0.48437317038974836
3
0.4963243501112441
4
0.4949887051732227
4
0.4251502244137099
5
0.33385957680198386
5
0.4594991284899076
6
0.5031112486817445
6
0.36392488109676113
7
0.45448238090462456
7
0.31081664712768126
8
0.40856844596082215
8
0.18158578095254413
0
0.21434905549000105
0


#### Perfom MTCNN facial detection

Iterate through the DataLoader object and obtained cropped faces.

In [None]:
dataset = datasets.ImageFolder(data_dir, transform=transforms.Resize((512, 512)))
dataset.samples = [
    (p, p.replace(data_dir, data_dir + '_aligned')) for p, _ in dataset.samples
]
        
loader = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=batch_size,
    collate_fn=training.collate_pil
)

for i, (x, y) in enumerate(loader):
    mtcnn(x, save_path=y)
    print('\rBatch {} of {}'.format(i + 1, len(loader)), end='')
    
# Remove mtcnn to reduce GPU memory usage
del mtcnn

#### Define Inception Resnet V1 module

See `help(InceptionResnetV1)` for more details.

In [None]:
resnet = InceptionResnetV1(
    classify=False,
    pretrained='vggface2'
).to(device)

In [None]:
len(dataset.class_to_idx)

#### Define optimizer, scheduler, dataset, and dataloader

In [None]:
optimizer = optim.Adam(resnet.parameters(), lr=0.001)
scheduler = MultiStepLR(optimizer, [5, 10])

# trans = transforms.Compose([
#     transforms.RandomHorizontalFlip(),
#     transforms.ToTensor(),
#     transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
#     transforms.RandomErasing(),
# ])

trans = transforms.Compose([
    np.float32,
    transforms.ToTensor(),
    fixed_image_standardization
])

dataset = datasets.ImageFolder(data_dir + '_aligned', transform=trans)
img_inds = np.arange(len(dataset))
np.random.shuffle(img_inds)
train_inds = img_inds[:int(0.8 * len(img_inds))]
val_inds = img_inds[int(0.8 * len(img_inds)):]

train_loader = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=batch_size,
    sampler=SubsetRandomSampler(train_inds)
)
val_loader = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=batch_size,
    sampler=SubsetRandomSampler(val_inds)
)

#### Define loss and evaluation functions

In [None]:
loss_fn = torch.nn.CrossEntropyLoss()
metrics = {
    'fps': training.BatchTimer(),
    'acc': training.accuracy
}

#### Train model

In [None]:
writer = SummaryWriter()
writer.iteration, writer.interval = 0, 10

print('\n\nInitial')
print('-' * 10)
resnet.eval()
training.pass_epoch(
    resnet, loss_fn, val_loader,
    batch_metrics=metrics, show_running=True, device=device,
    writer=writer
)

for epoch in range(epochs):
    print('\nEpoch {}/{}'.format(epoch + 1, epochs))
    print('-' * 10)

    resnet.train()
    training.pass_epoch(
        resnet, loss_fn, train_loader, optimizer, scheduler,
        batch_metrics=metrics, show_running=True, device=device,
        writer=writer
    )

    resnet.eval()
    training.pass_epoch(
        resnet, loss_fn, val_loader,
        batch_metrics=metrics, show_running=True, device=device,
        writer=writer
    )

writer.close()

In [None]:
loader = DataLoader(dataset)

X = []
y = []
for x, idx in loader:
    X += resnet(x.cuda()).tolist()
    y += idx.tolist()

In [None]:
len(X[0])

In [None]:
from sklearn import svm

In [None]:
clf = svm.SVC(kernel='linear', probability=True)
clf.fit(X, y)

#### Test Model

In [None]:
mtcnn = MTCNN(
    image_size=160, margin=0, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709,
    device=device
)

In [None]:
data_dir = './data/test'

dataset = datasets.ImageFolder(data_dir, transform=transforms.Resize((512, 512)))
dataset.samples = [
    (p, p.replace(data_dir, data_dir + '_aligned')) for p, _ in dataset.samples
]

loader = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=batch_size,
    collate_fn=training.collate_pil
)

for i, (x, y) in enumerate(loader):
    mtcnn(x, save_path=y)
    print('\rBatch {} of {}'.format(i + 1, len(loader)), end='')
    
# Remove mtcnn to reduce GPU memory usage
del mtcnn

In [None]:
# trans = transforms.Compose([
#     transforms.RandomHorizontalFlip(),
#     transforms.ToTensor(),
#     transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
#     transforms.RandomErasing(),
# ])

trans = transforms.Compose([
    np.float32,
    transforms.ToTensor(),
    fixed_image_standardization
])

dataset = datasets.ImageFolder(data_dir + '_aligned', transform=trans)

In [None]:
loader = DataLoader(dataset)

X = []
y = []
for x, idx in loader:
    X += resnet(x.cuda()).tolist()
    y += idx.tolist()

In [None]:
dataset

In [None]:
for i in clf.predict_proba(X):
    print(np.argmax(i), i)

In [None]:
len(y)

#### Make Classification Model with SVM

In [None]:
data_loader = DataLoader(dataset)

In [None]:
X = []
y = []
for i in data_loader:
    X += resnet(i[0].cuda()).tolist()
    y.append(i[1].cuda().tolist())

In [None]:
from sklearn import svm

In [None]:
clf = svm.SVC(kernel='linear', probability=True)
clf.fit(X, y)

In [None]:
mtcnn = MTCNN(
    image_size=160, margin=0, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709,
    device=device
)

In [None]:
def collate_fn(x):
    return x[0]

dataset = datasets.ImageFolder('./data/test')
dataset.idx_to_class = {i:c for c, i in dataset.class_to_idx.items()}
loader = DataLoader(dataset, collate_fn=collate_fn, num_workers=workers)

In [None]:
aligned = []
names = []
for x, y in loader:
    x_aligned, prob = mtcnn(x, return_prob=True)
    if x_aligned is not None:
        print('Face detected with probability: {:8f}'.format(prob))
        aligned.append(x_aligned)
        names.append(dataset.idx_to_class[y])

In [None]:
type(aligned)

In [None]:
aligned = torch.stack(aligned).to(device)
embeddings = resnet(aligned.cuda()).detach().cpu()

In [None]:
len(embeddings.tolist()[0])

In [None]:
print(clf.predict_proba(embeddings.tolist()))
print(names)
print(dataset.idx_to_class)

In [None]:
resnet