In [4]:
import pandas as pd
import numpy as np

from os import listdir
from os.path import isfile, join
from datetime import datetime

import os
import xml.etree.ElementTree as ET
from PIL import Image
from sklearn.metrics import average_precision_score
import time

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.utils.data import random_split

import torchvision.models as models
import torchvision.transforms as transforms
from torchvision.transforms import FiveCrop, ToTensor, Lambda, Compose, CenterCrop, Normalize


In [5]:
class PascalVOC:
    """
    Handle Pascal VOC dataset
    """
    def __init__(self, root_dir):
        """
        Summary: 
            Init the class with root dir
        Args:
            root_dir (string): path to your voc dataset
        """
        self.root_dir = root_dir
        self.img_dir =  os.path.join(root_dir, 'JPEGImages/')
        self.ann_dir = os.path.join(root_dir, 'Annotations')
        self.set_dir = os.path.join(root_dir, 'ImageSets', 'Main')
        self.cache_dir = os.path.join(root_dir, 'csvs')
        if not os.path.exists(self.cache_dir):
            os.makedirs(self.cache_dir)

    def list_image_sets(self):
        """
        Summary: 
            List all the image sets from Pascal VOC. Don't bother computing
            this on the fly, just remember it. It's faster.
        """
        return [
            'aeroplane', 'bicycle', 'bird', 'boat',
            'bottle', 'bus', 'car', 'cat', 'chair',
            'cow', 'diningtable', 'dog', 'horse',
            'motorbike', 'person', 'pottedplant',
            'sheep', 'sofa', 'train',
            'tvmonitor']

    def _imgs_from_category(self, cat_name, dataset):
        """
        Summary: 
        Args:
            cat_name (string): Category name as a string (from list_image_sets())
            dataset (string): "train", "val", "train_val", or "test" (if available)
        Returns:
            pandas dataframe: pandas DataFrame of all filenames from that category
        """
        filename = os.path.join(self.set_dir, cat_name + "_" + dataset + ".txt")
        df = pd.read_csv(
            filename,
            delim_whitespace=True,
            header=None,
            names=['filename', cat_name])
        return df

    def imgs_from_category_as_list(self, cat_name, dataset):
        """
        Summary: 
            Get a list of filenames for images in a particular category
            as a list rather than a pandas dataframe.
        Args:
            cat_name (string): Category name as a string (from list_image_sets())
            dataset (string): "train", "val", "train_val", or "test" (if available)
        Returns:
            list of srings: all filenames from that category
        """
        df = self._imgs_from_category(cat_name, dataset)
#         df = df[df['true'] == 1]
        return df


In [6]:
class HandsomeBinderNet(Dataset):
#     def __init__(self, img_root, ins_label_pairs , crop_size, transform=None):
    def __init__(self, img_root, classes, pvc, dataset_type, transform=None):
        
        """
        
        img_root: contains the path to the image root folder
        ins_label_pairs: instance label pair that contains a list of all the image path names and their respective labels
        crop_size: contains desired crop dimensions
        transform: contains the transformation procedures to be applied. defaulted to be None
        
        """
        
        self.img_root = img_root
        self.transform = transform
        self.classes = classes
        self.pvc = pvc
        self.dataset_type = dataset_type
        self.ins_label_pairs = self.instance_label_prep(self.classes, self.pvc, self.dataset_type)
  
    def __len__(self):
        return len(self.ins_label_pairs)
    
    def image_load(self, image_path):
        # Open image and load
        img = (Image.open(image_path))
        img.load()
        
        img = np.array(img)
        if len(img.shape) == 2:
            img = np.expand_dims(img, 2)
            img = np.repeat(img, 3, 2)
            
        return Image.fromarray(img)
        
    def __getitem__(self, index):
        # Path to the image
        image_path = self.img_root + self.ins_label_pairs[index][0] + '.jpg'
        
        # Open the image
        image = self.image_load(image_path)
        label = torch.from_numpy((self.ins_label_pairs[index][1]).astype(float))
        
        if self.transform is not None:
            image = self.transform(image)
        
        return [image, label]
    
    
    def instance_label_prep(self, classes, pvc, dataset_type):
        
        """

        classes: a list containing the classes used
        pvc: pascalVOC object
        dataset_type: train, trainval or val

        """

        # Get a dataframe from within the pascalVOC dataset. It will be in a one hot encoding fashion
        final_df = None

        # Loop through each different class to get each image's classes
        for index, x in enumerate(classes):
            cat_name = x # category name

            df = pvc.imgs_from_category_as_list(cat_name, dataset_type)
            df[x] = df[x].replace(-1, 0)

            # For the first category, we use its dataframe as the base
            if index == 0:
                final_df = df
            # And merge with the rest of the following categories
            else:        
                final_df = final_df.merge(df, on='filename', how='inner')

        # Here we get each image name and their respective labels (one hot encoding format) and store in a list
        df_np = final_df.to_numpy()

        ins_labels = []
        for x in df_np:
            ins_labels.append([x[0], x[1:]])

        return ins_labels
    

In [7]:
class_list = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable',
            'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']

pvc = PascalVOC('VOC2012')
imgpath = 'VOC2012/JPEGImages/'

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')    
tr = transforms.Compose([transforms.RandomResizedCrop(300),
                             transforms.ToTensor(),
                             transforms.Normalize([0.4589, 0.4355, 0.4032],[0.2239, 0.2186, 0.2206])])

# Create datasets and respective dataloaders

train_dataset = HandsomeBinderNet(imgpath, class_list, pvc, 'train', transform=tr)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)

val_dataset = HandsomeBinderNet(imgpath, class_list, pvc, 'trainval', transform=tr)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=True)

test_dataset = HandsomeBinderNet(imgpath, class_list, pvc, 'val', transform=tr)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=True)


In [8]:
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(512, 20)
model.to(device)
optimizer = optim.SGD(model.parameters(), lr=1e-3, momentum=0.9)
loss_fun = nn.BCEWithLogitsLoss()

In [9]:
def train_epoch(model, device, train_loader, optimizer, num_epochs, val_loader, loss_fun):
    train_losses = []
    val_losses = []
    val_ap_scores = []
    
    directory = 'model_' + str(datetime.now())
    if not os.path.exists(directory):
        os.makedirs(directory)

    for epoch in range(1, num_epochs + 1):
        train_loss = train(model, device, train_loader, optimizer, epoch, loss_fun)
        val_loss, val_ap_score = validate(model, device, val_loader, epoch, loss_fun)

        if (len(val_losses) > 0) and (val_loss < min(val_losses)):
            torch.save(model.state_dict(), directory + '/resnet18_model_{}.pt'.format(epoch))
            print("Saving model (epoch {}) with lowest validation loss: {}".format(epoch, val_loss))

        train_losses.append(train_loss)
        val_losses.append(val_loss)
        val_ap_scores.append(val_ap_score)

    print("Training and validation complete.")
    return train_losses, val_losses, val_ap_scores

In [10]:
def train(model, device, train_loader, optimizer, epoch, loss_fun):
    model.train()
    train_losses = []
    for idx, batch in enumerate(train_loader):
        data = batch[0].to(device)
        target = batch[1].to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = loss_fun(output, target)
        loss.backward()
        optimizer.step()
        train_losses.append(loss.item())
        if idx % 5 == 0:
            print('Epoch: {}, Training_Samples: {}/{}, Loss: {}'.format(epoch, idx, len(train_loader), loss.item()))
    train_loss = torch.mean(torch.tensor(train_losses))
    print('\nEpoch: {}'.format(epoch))
    print('Training set: Average loss: {:.4f}'.format(train_loss))
    return train_loss

In [11]:
def validate(model, device, val_loader, epoch, loss_fun):
    model.eval()
    val_loss = 0
    correct = 0
    
    with torch.no_grad():
        for idx, batch in enumerate(val_loader):
            data = batch[0].to(device)
            target = batch[1].to(device)
            output = model(data)
            
            # compute the batch loss
            batch_loss = loss_fun(output, target).item()
            val_loss += batch_loss
            pred = torch.sigmoid(output)
            
            if idx == 0:
                predictions = pred
                targets = target
            else:
                predictions = torch.cat((predictions, pred))
                targets = torch.cat((targets, target))
            if idx % 5 == 0:
                print('Epoch: {}, Validation_Samples: {}/{}, Loss: {}'.format(epoch, idx, len(val_loader), batch_loss))

    val_loss /= len(val_loader)
    print('\nEpoch: {}'.format(epoch))
    print('Validation set: Average loss: {:.4f}, AP: {:.4f})'.format(val_loss, 
                                                                     average_precision_score(targets.reshape(-1, 20).cpu(), 
                                                                                             predictions.reshape(-1, 20).cpu())))
    
    return val_loss, predictions, targets

In [12]:
start = time.time()
train_losses, val_losses, val_accuracies = train_epoch(model, device, train_loader, optimizer, 10, val_loader, loss_fun)

print(time.time() - start)

OSError: [WinError 123] The filename, directory name, or volume label syntax is incorrect: 'model_2020-03-17 00:20:10.290689'