# Computing initial dog embeddings from train set

This code aim to generate embeddings from the training dataset, so it can be used to dynamically generate dog breed classifiers. After the training of the embeddings extraction model is done, this computation is only needed to be performed once.

## Accessing the Dog Breed Recognition dataset

I have created a directory called "dog-breed-recognition". There, I have put the directory called "dogs" as refering to the dataset itself. For training, it is only used the samples contained at "train" directory.

In [None]:
from google.colab import drive
drive.mount('/content/drive/')
root = '/content/drive/My Drive/Colab Notebooks/dog-breed-recognition'

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


## Importing basic Python libraries

In [None]:
import os
import sys
import tqdm
import random
import copy
from six.moves import cPickle as pickle

from PIL import Image
import numpy as np

## Importing PyTorch library

For GPU usage, go to "Edit > Notebook Settings" and make sure the hardware accelerator is set to GPU.

In [None]:
import torch
import torchvision
from torchvision import transforms

# Creating a PyTorch device, so that inputs, outputs and models are apllied to
#   the available GPU
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

## Setting up initial enrolling parameters

In [None]:
# `dataset_path ` represents the path containing the dog breed images (from the
#   initial 100 classes)
dataset_path = os.path.join(root, 'dogs', 'train')

# `model_ckpt_path` encodes the path to the file containing the weights of the
#   embeddings extraction model
model_ckpt_path = os.path.join(root, 'models', 'embedder.pth')

# `initial_enroll_path` represents the path where to save the embeddings of the
#   set of images representing the initial 100 classes
initial_enroll_path = os.path.join(root, 'models', 'initial_enroll.pkl')

# Batch size which to compute the embeddings
batch_size = 32

# Number of workers for multiprocessing the images loading
n_workers = 8

## Setting up embedder model loading

In [None]:
def embedder_model(n_embeddings):
  '''
  Generates a CNN ResNet50-based embedder.
  
  Parameters
  ----------
  n_embeddings
    number of embeddings to be outputted
  
  Returns
  -------
  x : torch.nn
    the model
  '''

  # First, `x` is a new ResNet50 CNN model
  x = torchvision.models.resnet50(pretrained=False)
  
  # Change the final fully connected layer so that the output size
  #   matches the desired `n_embeddings` size. Also, apply sigmoid
  #   function
  x.fc = torch.nn.Sequential(
      torch.nn.Linear(2048, n_embeddings),
      torch.nn.Sigmoid())

  return x

# Load checkpoint of the trained embedding extractor
model_ckpt = torch.load(model_ckpt_path, map_location=device)

# Get the number of embeddings of the trained model
n_embeddings = model_ckpt['n_embeddings']

# Get the weights of the trained model
state_dict = model_ckpt['state_dict']

# Initialize the embedder architecture
model = embedder_model(n_embeddings)

# Load the weights into the embedder
model.load_state_dict(state_dict)
model.eval()
model = torch.jit.script(model).to(device)

## Initial dataset loading

In [None]:
class ImageDataset(torch.utils.data.Dataset):
  """
  A class to read the set of images from the initial 100 classes.
  
  Attributes
  ----------
  classes : list<str>
    list of classes (dog breeds) presented in the 100-classes initial dataset
  instances_path : list<str>
    list of paths of each instance of the 100-classes initial dataset
  instances_class : list<int>
    list of dog breed labels of each instance of the 100-classes initial dataset
  transform : torch.transform
    input preprocessing pipeline

  Data descriptors
  ----------------
  __getitem__
    Gets the model's input and the respective dog breed index from a dataset
      instance.

  __len__
    Gets the number of samples presented in the set of images from the initial
      100 classes.
  """

  def __init__(self, dataset_path):
    '''
    Constructs all the attributes for the image loader object.

    Parameters
    ----------
    dataset_path : str
      root of the 100-classes initial dataset
    '''

    # `dataset_path` divides the dataset in a list of directories, where each
    #   directory represent a class (dog breed). When listing the presented
    #   directories in `dataset_path`, `self.classes` will contain the list of
    #   dog breeds presented in the dataset
    self.classes = sorted(os.listdir(dataset_path))

    self.instances_path = []
    self.instances_class = []

    # Obtain the paths to all instances of the dataset, as well as their
    #   respective dog breeds' labels
    for i_class, curr_class in enumerate(self.classes):
      class_path = os.path.join(dataset_path, curr_class)
      curr_instances_path = [os.path.join(class_path, instance)
        for instance in sorted(os.listdir(class_path))]
      
      self.instances_path += curr_instances_path
      self.instances_class += [i_class for i in range(len(curr_instances_path))]

    # Initialize preprocessing input pipeline
    self.transform = transforms.Compose([
      transforms.Resize(256),
      transforms.CenterCrop(224),
      transforms.ToTensor(),
      transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

  def __getitem__(self, index):
    '''
    Gets the model's input and the respective dog breed index from a dataset
      instance.

    Parameters
    ----------
    index : int
      index of the instance to be accessed

    Returns
    -------
    x : torch.Tensor
      tensor refering to the preprocessed sample to be used as an model input
    y : int
      label refering to the dog breed index of the current instance
    '''
    
    # Access instance's path (`instance_path`)
    instance_path = self.instances_path[index]

    # Access instance's class (`instance_class`)
    instance_class = self.instances_class[index]

    # Read image (`img`) and convert to red-green-blue channels (RGB), ensuring
    #   the input will have 3 channels
    img = Image.open(instance_path).convert('RGB')
    
    # `x` refers to the image when the preprocessing pipeline (`self.transform`)
    #   is applied to the image (`img`)
    x = self.transform(img)

    # `y` is the instance class (no changes are made)
    y = instance_class

    return x, y

  def __len__(self):
    '''
    Gets the number of samples presented in the set of images from the initial
      100 classes.

    Returns
    -------
    l : int
      the length of the dataset
    '''
    l = len(self.instances_path)

    return l

In [None]:
# Start initial embeddings and labels list
initial_embeddings = []
initial_labels = []

# Instantiate object for accessing the set of images from the 100-classes
#   dataset
dataset = ImageDataset(dataset_path)

# Create dataloader for accessing the images set in batches
dataloader = torch.utils.data.DataLoader(dataset,
    batch_size=batch_size, num_workers=n_workers)

# Using tqdm to iteratively keep track on the number of iterated batches on the
#   console
dataloader = tqdm.tqdm(dataloader, position=0, leave=True)

# The dataloader is iterated, in order to access all pairs of input-output,
#   denoted by `(x, y)`
for x, y in dataloader:

  # Pass the input tensor to the used device (GPU or CPU)
  x = x.to(device)

  # Use the model to calculate the embeddings of the input (`embeddings`)
  embeddings = model(x)

  # Convert the embeddings to a list
  embeddings = embeddings.detach().cpu().numpy().tolist()

  # Also convert the output to a list
  y = y.detach().cpu().numpy().tolist()

  # Append the generated embeddings (and their respective dog breed labels) to
  #   the total list
  initial_embeddings += embeddings
  initial_labels += y

# Save generated embeddings and labels to a file
initial_enroll = {
    'embeddings': initial_embeddings,
    'labels': initial_labels,
    'classes': dataset.classes }
with open(initial_enroll_path, 'wb') as f:
  pickle.dump(initial_enroll, f)

# with open(initial_enroll_path, 'rb') as f:
#   initial_enroll = pickle.load(f)

100%|██████████| 543/543 [10:00<00:00,  1.11s/it]
