In [0]:
!pip install facenet_pytorch
!pip install pillow==6.2.1



In [0]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
!rm -rf /content/drive/My\ Drive/celebrity-face-recognition/data

In [0]:
!cp /content/drive/My\ Drive/celebrity-face-recognition ./ -r

In [0]:
%cd celebrity-face-recognition/

/content/celebrity-face-recognition


In [0]:
!rm -r data
!mkdir data
!mkdir data/raw
!mkdir data/processed

In [0]:
!cp -r -t data/raw/ downloads/*

In [0]:
import os


def walkdir(folder):
    """Walk through each files in a directory"""
    for dirpath, dirs, files in os.walk(folder):
        for filename in files:
            yield os.path.abspath(os.path.join(dirpath, filename))



In [0]:
celeb_names = os.listdir('data/raw')

In [0]:
import torch


class Config:
    def __init__(self):
        self.epoch = 200
        self.train_patience = 10

        self.batch_size = 32
        self.valid_ratio = 0.2
        self.test_ratio = 0.2

        self.lr = 0.0001

        self.seed = 420
        self.device = torch.device('cuda:0')
        
        self.num_workers = 1
        self.pin_memory = True
    
        # config for mtcnn

        self.image_size = 160
        self.margin = 0
        self.min_face_size = 20
        self.threshold = [0.6, 0.7, 0.7]
        self.factor = 0.709
        self.prewhiten = True


In [0]:
import os
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, transforms
from sklearn.model_selection import train_test_split
import numpy as np

from control import RAW_DATA_PATH, PROCESSED_DATA_PATH


class CropFaceDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

    def __len__(self):
        return len(X)


def get_dataloader(batch_size: int,
                   test_ratio=0.2,
                   valid_ratio=0.2,
                   random_state=420,
                   shuffle=True,
                   num_workers=4,
                   pin_memory=False):
    src_folder = os.path.join(PROCESSED_DATA_PATH, 'cropped')

    transforms_img = transforms.Compose([transforms.RandomResizedCrop(size=256, scale=(0.8, 1.0)),
                                         transforms.RandomRotation(degrees=15),
                                         transforms.ColorJitter(),
                                         transforms.RandomHorizontalFlip(),
                                         transforms.CenterCrop(size=224),  # Image net standards
                                         transforms.ToTensor(),
                                         transforms.Normalize([0.485, 0.456, 0.406],
                                                              [0.229, 0.224, 0.225])
                                         ])
    dataset = datasets.ImageFolder(src_folder, transform=transforms_img)

    X = [i for i, j in dataset.imgs]
    y = [j for i, j in dataset.imgs]

    X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                        test_size=test_ratio,
                                                        random_state=random_state,
                                                        shuffle=shuffle,
                                                        stratify=y)

    X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train,
                                                          test_size=valid_ratio,
                                                          random_state=random_state,
                                                          shuffle=shuffle,
                                                          stratify=y_train)

    train_set = CropFaceDataset(X_train, y_train)
    valid_set = CropFaceDataset(X_valid, y_valid)
    test_set = CropFaceDataset(X_test, y_test)

    train_loader = DataLoader(train_set, batch_size=batch_size, num_workers=num_workers, pin_memory=pin_memory,
                              collate_fn=collate_pil)
    valid_loader = DataLoader(valid_set, batch_size=batch_size, num_workers=num_workers, pin_memory=pin_memory,
                              collate_fn=collate_pil)
    test_loader = DataLoader(test_set, batch_size=batch_size, num_workers=num_workers, pin_memory=pin_memory,
                             collate_fn=collate_pil)

    return train_loader, valid_loader, test_loader, dataset.class_to_idx


def create_cropped_face_dataset(mtcnn,
                                size,
                                batch_size: int,
                                num_workers=4,
                                pin_memory=False):
    dest_folder = os.path.join(PROCESSED_DATA_PATH, 'cropped')
    src_folder = RAW_DATA_PATH

    dataset = datasets.ImageFolder(src_folder, transform=transforms.Resize((size, size)))
    dataset.samples = [(p, p.replace(src_folder, dest_folder)) for p, _ in dataset.samples]

    loader = DataLoader(dataset,
                        batch_size=1,
                        num_workers=num_workers,
                        pin_memory=pin_memory,
                        collate_fn=collate_pil)

    for i, (x, y) in enumerate(loader):
        print('\rImages processed: {:8d} of {:8d}'.format(i + 1, len(loader)), end='')
        mtcnn(x, save_path=y)
        print((x, y))


def collate_pil(x):
    out_x, out_y = [], []
    for xx, yy in x:
        out_x.append(xx)
        out_y.append(yy)
    return out_x, out_y


In [0]:
from facenet_pytorch import MTCNN

cfg = Config()
mtcnn = MTCNN(
    image_size=cfg.image_size,
    margin=cfg.margin,
    min_face_size=cfg.min_face_size,
    thresholds=cfg.threshold,
    factor=cfg.factor,
    # prewhiten=cfg.prewhiten,
    keep_all=True,
    device=cfg.device)

create_cropped_face_dataset(mtcnn,
                            160,
                            cfg.batch_size,
                            cfg.num_workers,
                            cfg.pin_memory)

del mtcnn

Images processed:        1 of     1110([<PIL.Image.Image image mode=RGB size=160x160 at 0x7FA4935024E0>], ['/content/celebrity-face-recognition/data/processed/cropped/Bang Kieu/Bang Kieu_1.png'])
Images processed:        2 of     1110([<PIL.Image.Image image mode=RGB size=160x160 at 0x7FA493502940>], ['/content/celebrity-face-recognition/data/processed/cropped/Bang Kieu/Bang Kieu_10.png'])
Images processed:        3 of     1110([<PIL.Image.Image image mode=RGB size=160x160 at 0x7FA493502208>], ['/content/celebrity-face-recognition/data/processed/cropped/Bang Kieu/Bang Kieu_11.png'])
Images processed:        4 of     1110([<PIL.Image.Image image mode=RGB size=160x160 at 0x7FA493502198>], ['/content/celebrity-face-recognition/data/processed/cropped/Bang Kieu/Bang Kieu_12.png'])
Images processed:        5 of     1110([<PIL.Image.Image image mode=RGB size=160x160 at 0x7FA493502208>], ['/content/celebrity-face-recognition/data/processed/cropped/Bang Kieu/Bang Kieu_13.png'])
Images proce

  "Palette images with Transparency expressed in bytes should be "


([<PIL.Image.Image image mode=RGB size=160x160 at 0x7FA493502550>], ['/content/celebrity-face-recognition/data/processed/cropped/Nguyen Cong Phuong/Nguyen Cong Phuong_15.PNG'])
Images processed:      613 of     1110([<PIL.Image.Image image mode=RGB size=160x160 at 0x7FA493502FD0>], ['/content/celebrity-face-recognition/data/processed/cropped/Nguyen Cong Phuong/Nguyen Cong Phuong_16.png'])
Images processed:      614 of     1110([<PIL.Image.Image image mode=RGB size=160x160 at 0x7FA493502390>], ['/content/celebrity-face-recognition/data/processed/cropped/Nguyen Cong Phuong/Nguyen Cong Phuong_17.png'])
Images processed:      615 of     1110([<PIL.Image.Image image mode=RGB size=160x160 at 0x7FA493502F60>], ['/content/celebrity-face-recognition/data/processed/cropped/Nguyen Cong Phuong/Nguyen Cong Phuong_18.png'])
Images processed:      616 of     1110([<PIL.Image.Image image mode=RGB size=160x160 at 0x7FA493502278>], ['/content/celebrity-face-recognition/data/processed/cropped/Nguyen 

In [0]:
!cp -r -f data /content/drive/My\ Drive/celebrity-face-recognition/