In [12]:
import matplotlib
import matplotlib.pyplot as plt

In [13]:
import os
import shutil
from bing_image_downloader.bing_image_downloader import downloader


directory_list = [
    './final_asian_female/train/',
    './final_asian_female/test/',
]

# create an initial directory
for directory in directory_list:
    if not os.path.isdir(directory):
        os.makedirs(directory)

# function
def dataset_split(query, train_cnt):
    # create a directory
    for directory in directory_list:
        if not os.path.isdir(directory + '/' + query):
            os.makedirs(directory + '/' + query)
    # preparing
    cnt = 0
    for file_name in os.listdir(query):
        if cnt < train_cnt:
            print(f'[Train Dataset] {file_name}')
            shutil.move(query + '/' + file_name, './final_asian_female/train/' + query + '/' + file_name)
        else:
            print(f'[Test Dataset] {file_name}')
            shutil.move(query + '/' + file_name, './final_asian_female/test/' + query + '/' + file_name)
        cnt += 1
    shutil.rmtree(query)

In [14]:
# data for Jennie
query = 'Jennie Blackpink'
downloader.download(query, limit=160,  output_dir='./', adult_filter_off=True, force_replace=False, timeout=60)
dataset_split(query, 120)

[Info] Indexing page: 1
[Info] Indexed 18 Images on Page 1.
#1 Downloading image (https://img1.kpopmap.com/2019/09/JeenieKim-grid.jpg)
#1 The file has been successfully downloaded.
#2 Downloading image (http://philnews.ph/wp-content/uploads/2019/05/jennie.jpg)
#2 The file has been successfully downloaded.
#3 Downloading image (https://1409791524.rsc.cdn77.org/data/images/full/588113/blackpink-jennie.jpeg?w=539&amp;h=397)
#3 The file has been successfully downloaded.
#4 Downloading image (https://kt.wowkorea.jp/img/album/45/228352/299051.jpg)
#4 The file has been successfully downloaded.
#5 Downloading image (https://i.pinimg.com/736x/92/46/eb/9246ebbff26c48825597a4bad69587f6.jpg)
#5 The file has been successfully downloaded.
#6 Downloading image (https://1409791524.rsc.cdn77.org/data/images/full/574622/blackpink-jennie-between-chic-and-lovely-fascinating-visuals.jpeg?w=650)
#6 The file has been successfully downloaded.
#7 Downloading image (http://img2.yna.co.kr/etc/inner/EN/2018/10/18

KeyboardInterrupt: 

In [None]:
# data for Rosé
query = 'Rosé Blackpink'
downloader.download(query, limit=160,  output_dir='./', adult_filter_off=True, force_replace=False, timeout=60)
dataset_split(query, 120)

In [None]:
# data for Jisoo
query = 'Jisoo Blackpink'
downloader.download(query, limit=160,  output_dir='./', adult_filter_off=True, force_replace=False, timeout=60)
dataset_split(query, 120)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

import torchvision
from torchvision import datasets, models, transforms

import numpy as np
import time


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # device

In [None]:
# define transforms_train dataset
transforms_train = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(), # augmentation
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # normalization
])

transforms_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

data_dir = './final_asian_female'
train_datasets = datasets.ImageFolder(os.path.join(data_dir, 'train'), transforms_train)
test_datasets = datasets.ImageFolder(os.path.join(data_dir, 'test'), transforms_test)

train_dataloader = torch.utils.data.DataLoader(train_datasets, batch_size=32, shuffle=True, num_workers=4)
test_dataloader = torch.utils.data.DataLoader(test_datasets, batch_size=32, shuffle=True, num_workers=4)

print('size of training dataset:', len(train_datasets))
print('size of validating dataset:', len(test_datasets))

class_names = train_datasets.classes
print('class:', class_names)

In [None]:
def imshow(input, title):
    # convert torch.Tensor to numpy
    input = input.numpy().transpose((1, 2, 0))
    
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    input = std * input + mean
    input = np.clip(input, 0, 1)
    # print image
    plt.imshow(input)
    plt.title(title)
    plt.show()


# call training data in batches
iterator = iter(train_dataloader)

# visualize the batches
inputs, classes = next(iterator)
out = torchvision.utils.make_grid(inputs)
imshow(out, title=[class_names[x] for x in classes])

In [None]:
model = models.resnet34(pretrained=True)
num_features = model.fc.in_features
# transfer learning
model.fc = nn.Linear(num_features, 3)
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [None]:
num_epochs = 10
model.train()
start_time = time.time()

# repeat the number of epoch
for epoch in range(num_epochs):
    train_loss = 0.
    train_corrects = 0

    # call training data in batches
    for inputs, labels in train_dataloader:
        inputs = inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        train_loss += loss.item() * inputs.size(0)
        train_corrects += torch.sum(preds == labels.data)

    epoch_loss = train_loss / len(train_datasets)
    epoch_acc = train_corrects / len(train_datasets) * 100.


    model.eval()
    valid_loss = 0.
    valid_corrects = 0
    
    for inputs, labels in test_dataloader:
        inputs = inputs.to(device)
        labels = labels.to(device)

        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        loss = criterion(outputs, labels)

        valid_loss += loss.item() * inputs.size(0)
        valid_corrects += torch.sum(preds == labels.data)
        
    epoch_loss2 = valid_loss / len(test_datasets)
    epoch_acc2 = valid_corrects / len(test_datasets) * 100.    
    # print the result
    print('#{} Loss: {:.4f} Valid Loss: {:.4f} Acc: {:.4f}% Valid Acc: {:.4f}% Time: {:.4f}s'.format(epoch, epoch_loss, epoch_loss2, epoch_acc, epoch_acc2, time.time() - start_time))

In [None]:
# print some results
print(f'[Prediction: {class_names[preds[2]]}] (Actual: {class_names[labels.data[2]]})')
imshow(inputs.cpu().data[2], title='Prediction: ' + class_names[preds[2]])
print(f'[Prediction: {class_names[preds[9]]}] (Actual: {class_names[labels.data[9]]})')
imshow(inputs.cpu().data[9], title='Prediction: ' + class_names[preds[9]])
print(f'[Prediction: {class_names[preds[10]]}] (Actual: {class_names[labels.data[10]]})')
imshow(inputs.cpu().data[10], title='Prediction: ' + class_names[preds[10]])