# Facial Recognition Classification

In [1]:
import matplotlib
import matplotlib.pyplot as plt

In [2]:
import os
import shutil
from bing_image_downloader.bing_image_downloader import downloader


directory_list = [
    './ryan_group2/train/',
    './ryan_group2/test/',
]

# create an initial directory
for directory in directory_list:
    if not os.path.isdir(directory):
        os.makedirs(directory)

# function
def dataset_split(query, train_cnt):
    # create a directory
    for directory in directory_list:
        if not os.path.isdir(directory + '/' + query):
            os.makedirs(directory + '/' + query)
    # preparing
    cnt = 0
    for file_name in os.listdir(query):
        if cnt < train_cnt:
            print(f'[Train Dataset] {file_name}')
            shutil.move(query + '/' + file_name, './ryan_group2/train/' + query + '/' + file_name)
        else:
            print(f'[Test Dataset] {file_name}')
            shutil.move(query + '/' + file_name, './ryan_group2/test/' + query + '/' + file_name)
        cnt += 1
    shutil.rmtree(query)

In [3]:
# data for Hugh Jackman
query = 'Hugh Jackman'
downloader.download(query, limit=80,  output_dir='./', adult_filter_off=True, force_replace=False, timeout=60)
dataset_split(query, 60)

[Info] Indexing page: 1
[Info] Indexed 35 Images on Page 1.
#1 Downloading image (https://www.thescottishsun.co.uk/wp-content/uploads/sites/2/2019/05/NINTCHDBPICT000470738485.jpg)
#1 The file has been successfully downloaded.
#2 Downloading image (https://www.dailypioneer.com/uploads/2019/story/images/big/hugh-jackman-starrer--the-music-man--revival-coming-to-broadway-2019-03-14.jpg)
#2 The file has been successfully downloaded.
#3 Downloading image (https://cdn.vox-cdn.com/thumbor/uWtkwb0qrMrOnVKOgWnCn-VuqLk=/0x0:6196x4131/1200x800/filters:focal(2603x1570:3593x2560)/cdn.vox-cdn.com/uploads/chorus_image/image/64641765/1f3f699511.0.jpeg)
#3 The file has been successfully downloaded.
#4 Downloading image (https://financerewind.com/wp-content/uploads/2020/09/ezgif-6-dba5517a2f83.jpg)
#4 The file has been successfully downloaded.
#5 Downloading image (https://cdn.britannica.com/47/201647-050-C547C128/Hugh-Jackman-2013.jpg)
[Error] Invalid image, not saving https://cdn.britannica.com/47/201

KeyboardInterrupt: 

In [None]:
# data for Ryan Reynolds
query = 'Ryan Reynolds'
downloader.download(query, limit=80,  output_dir='./', adult_filter_off=True, force_replace=False, timeout=60)
dataset_split(query, 60)

In [None]:
# data for Jake Gyllenhaal
query = 'Jake Gyllenhaal'
downloader.download(query, limit=80,  output_dir='./', adult_filter_off=True, force_replace=False, timeout=60)
dataset_split(query, 60)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

import torchvision
from torchvision import datasets, models, transforms

import numpy as np
import time


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # device

In [None]:
# define transforms_train dataset
transforms_train = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(), # augmentation
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # normalization
])

transforms_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

data_dir = './ryan_group2'
train_datasets = datasets.ImageFolder(os.path.join(data_dir, 'train'), transforms_train)
test_datasets = datasets.ImageFolder(os.path.join(data_dir, 'test'), transforms_test)

train_dataloader = torch.utils.data.DataLoader(train_datasets, batch_size=32, shuffle=True, num_workers=4)
test_dataloader = torch.utils.data.DataLoader(test_datasets, batch_size=32, shuffle=True, num_workers=4)

print('size of training dataset:', len(train_datasets))
print('size of testing dataset:', len(test_datasets))

class_names = train_datasets.classes
print('class:', class_names)

In [None]:
def imshow(input, title):
    # convert torch.Tensor to numpy
    input = input.numpy().transpose((1, 2, 0))
    
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    input = std * input + mean
    input = np.clip(input, 0, 1)
    # print image
    plt.imshow(input)
    plt.title(title)
    plt.show()


# call training data in batches
iterator = iter(train_dataloader)

# visualize the batches
inputs, classes = next(iterator)
out = torchvision.utils.make_grid(inputs)
imshow(out, title=[class_names[x] for x in classes])

In [None]:
model = models.resnet34(pretrained=True)
num_features = model.fc.in_features
# transfer learning
model.fc = nn.Linear(num_features, 3)
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [None]:
num_epochs = 15
model.train()
start_time = time.time()

# repeat the number of epoch
for epoch in range(num_epochs):
    running_loss = 0.
    running_corrects = 0

    # call training data in batches
    for inputs, labels in train_dataloader:
        inputs = inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)

    epoch_loss = running_loss / len(train_datasets)
    epoch_acc = running_corrects / len(train_datasets) * 100.

    # print the result while training
    print('#{} Loss: {:.4f} Acc: {:.4f}% Time: {:.4f}s'.format(epoch, epoch_loss, epoch_acc, time.time() - start_time))

In [None]:
# evaluate the model
model.eval()
start_time = time.time()

with torch.no_grad():
    running_loss = 0.
    running_corrects = 0

    for inputs, labels in test_dataloader:
        inputs = inputs.to(device)
        labels = labels.to(device)

        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        loss = criterion(outputs, labels)

        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)

        # visualize
        print(f'[Prediction: {class_names[preds[0]]}] (Actual: {class_names[labels.data[0]]})')
        imshow(inputs.cpu().data[0], title='Prediction: ' + class_names[preds[0]])

    epoch_loss = running_loss / len(test_datasets)
    epoch_acc = running_corrects / len(test_datasets) * 100.
    print('[Test Phase] Loss: {:.4f} Acc: {:.4f}% Time: {:.4f}s'.format(epoch_loss, epoch_acc, time.time() - start_time))