# Import library


In [13]:
import torch
import math
import os
import glob
import numpy as np
import matplotlib.pyplot as plt
import random
import torchvision

from sklearn.model_selection import train_test_split
from typing import Any
from torch.utils.data import Dataset, DataLoader, random_split
from torch.utils.tensorboard import SummaryWriter
from PIL import Image
from sklearn.metrics import accuracy_score
from tqdm import tqdm
from IPython.display import clear_output

# Setup device and tensorboard


In [14]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
torch.manual_seed(42)

<torch._C.Generator at 0x280e4bca2d0>

# Customized Dataset


In [15]:
class ImageDataset(Dataset):
    def __init__(self, list_root_dir: list, transform=None) -> None:
        super().__init__()
        self.list_root_dir = list_root_dir
        self.transform = transform

        self.image_paths = []
        for root_dir in list_root_dir:
            self.image_paths.extend(
                glob.glob(os.path.join(root_dir, "with", "*.jpg")))
            self.image_paths.extend(
                glob.glob(os.path.join(root_dir, "without", "*.jpg")))
        random.shuffle(self.image_paths)

    def __len__(self) -> int:
        return len(self.image_paths)

    def __getitem__(self, index) -> Any:
        image_path = self.image_paths[index]

        label = 1 if image_path.split(os.sep)[-2] == "with" else 0
        image = Image.open(image_path).convert("RGB")
        if self.transform:
            image = self.transform(image)

        return [image, label]

# Train one epoch


In [16]:
def train_model(model, loss_function, optimizer, train_loader, writer, epoch=1):
    model.train()
    running_loss = 0.0
    total_samples = 0
    y_true = []
    y_pred = []
    for data in train_loader:
        images, labels = data
        images = images.to(device)
        labels = labels.to(device)
        # Clear gradient
        optimizer.zero_grad()
        # Calculate logits
        outputs = model(images)
        # Calculate loss
        loss = loss_function(outputs, labels)
        # Calculate gradient from loss
        loss.backward()
        # Update weight
        optimizer.step()

        # Calculate loss
        running_loss += loss.item() * images.size(0)
        # Calculate total sample in data_loader
        total_samples += images.size(0)
        # Calculte y_predict for evaluation
        predicted = torch.argmax(outputs, dim=1)
        y_true.extend(labels.cpu().numpy())
        y_pred.extend(predicted.detach().cpu().numpy())
    loss = running_loss/total_samples
    accuracy = accuracy_score(y_true, y_pred)
    writer.add_scalar("train/loss", loss, epoch)
    writer.add_scalar("train/accuracy", accuracy, epoch)
    return loss, accuracy

# Eval model


In [17]:
def eval_model(model, loss_function, test_dataloader, writer, epoch):
    model.eval()
    total_samples = 0
    total_loss = 0
    y_true = []
    y_pred = []
    with torch.no_grad():
        for images, labels in test_dataloader:
            images = images.to(device)
            labels = labels.to(device)

            # Calculate logits
            outputs = model(images)

            # Calculate loss of outputs and y_true
            loss = loss_function(outputs, labels)
            total_loss += loss.item() * images.size(0)
            # Calculate total sample
            total_samples += images.size(0)

            # Calculte y_predict for evaluation
            predicted = torch.argmax(outputs, dim=1)
            y_true.extend(labels.cpu().numpy())
            y_pred.extend(predicted.detach().cpu().numpy())
    loss = total_loss/total_samples
    accuracy = accuracy_score(y_true, y_pred)
    writer.add_scalar("test/loss", loss, epoch)
    writer.add_scalar("test/accuracy", accuracy, epoch)
    return loss, accuracy

# Train


In [18]:
model_name_list = ["resnet34"]

for model_name in model_name_list:
    # Prepare model
    model = getattr(torchvision.models, model_name)(num_classes=2)
    model = model.to(device)

    # Prepare optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    # Prepare loss
    loss_function = torch.nn.CrossEntropyLoss()

    # Prepare data
    list_root_dir = []
    list_root_dir.append(os.path.join(os.path.dirname(os.getcwd()), "dataset", "gan_makeup_data_96"))
    list_root_dir.append(os.path.join(os.path.dirname(os.getcwd()), "dataset", "mtdataset_96"))
    list_root_dir.append(os.path.join(os.path.dirname(
        os.getcwd()), "dataset", "data_anh_Vinh"))

    train_transform = torchvision.transforms.Compose([
        torchvision.transforms.RandomResizedCrop([96, 96]),
        torchvision.transforms.RandomHorizontalFlip(),
        torchvision.transforms.RandomVerticalFlip(),
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
    ])
    test_transform = torchvision.transforms.Compose([
        torchvision.transforms.Resize([96, 96]),
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
    ])
    writer = SummaryWriter(f"logs/{model_name}")
    epoch = 70

    # Perform train-test split
    train_dataset = ImageDataset(list_root_dir, train_transform)
    test_dataset = ImageDataset(list_root_dir, test_transform)

    indices = list(range(len(train_dataset)))
    train_indices, test_indices = train_test_split(indices, test_size=0.1, train_size=0.9)
    train_dataset = torch.utils.data.Subset(train_dataset, train_indices)
    test_dataset = torch.utils.data.Subset(test_dataset, test_indices)

    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=True)

    # Keep track best result
    lowest_train_loss = math.inf
    highest_train_acc = 0
    lowest_test_loss = math.inf
    highest_test_acc = 0
    for i in range(epoch):
        # Train phase
        train_loss, train_acc = train_model(model, loss_function, optimizer, train_loader, writer, i)
        
        # Test phase
        test_loss, test_acc = eval_model(model, loss_function, test_loader, writer, i)
        
        # Command line log
        print(f'''epoch {i}: train_loss {round(train_loss,4)}, train_acc {round(train_acc,4)}, test_loss {test_loss}, test_acc {round(test_acc,4)}''')

        # Save the best model
        if train_loss <= lowest_train_loss and train_acc >= highest_train_acc and test_loss <= lowest_test_loss and test_acc >= highest_test_acc:
            lowest_train_loss = train_loss
            highest_train_acc = train_acc
            lowest_test_loss = test_loss
            highest_test_acc = test_acc
            saved_folder = os.path.join(os.getcwd(),f"{model_name}")
            if not os.path.exists(saved_folder):
                os.makedirs(saved_folder)
            saved_path = os.path.join(saved_folder, f"{i}.pth")
            torch.save(model.state_dict(), saved_path)

    # Delete everything
    model = None
    optimizer = None
    loss_function = None
    train_dataset = None
    test_dataset = None
    train_loader = None
    test_loader = None
    writer.flush()
    writer.close()
    clear_output(wait=True)

epoch 0: train_loss 1.9052, train_acc 0.5272, test_loss 0.7178971049336672, test_acc 0.6545
epoch 1: train_loss 1.0563, train_acc 0.5467, test_loss 0.9331698882950289, test_acc 0.6097
epoch 2: train_loss 0.954, train_acc 0.5875, test_loss 0.6763845640736916, test_acc 0.6554
epoch 3: train_loss 0.8364, train_acc 0.6387, test_loss 0.6167141289554087, test_acc 0.7066
epoch 4: train_loss 0.7429, train_acc 0.6806, test_loss 0.5284556391252242, test_acc 0.7587
epoch 5: train_loss 0.779, train_acc 0.6678, test_loss 0.6140241495434066, test_acc 0.6901
epoch 6: train_loss 0.7489, train_acc 0.6879, test_loss 0.5347755818305984, test_acc 0.7623
epoch 7: train_loss 0.6872, train_acc 0.7117, test_loss 0.6895363943232479, test_acc 0.6773
epoch 8: train_loss 0.804, train_acc 0.6431, test_loss 0.7220998516466326, test_acc 0.6106
epoch 9: train_loss 0.8642, train_acc 0.5912, test_loss 5.330075025122606, test_acc 0.617
epoch 10: train_loss 0.8772, train_acc 0.551, test_loss 0.6957753192245197, test_acc 