In [1]:
from google.colab import drive
drive.mount('/content/drive/')
root = '/content/drive/My Drive/Colab Notebooks/dog-breed-recognition'

Mounted at /content/drive/


In [3]:
import os
import sys
import tqdm
import random
import copy

from PIL import Image
import numpy as np
import cv2
import matplotlib.pyplot as plt

In [5]:
import torch
import torchvision
from torchvision import transforms

# Remember to activate GPU
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

if torch.cuda.is_available():
  torch.cuda.get_device_name(0)

'Tesla T4'

In [6]:
def get_dataset_split_labels(dataset_path, split_ratio):
  train_ratio, val_ratio, test_ratio = split_ratio

  dataset_labels = { 'train': [], 'val': [], 'test': [] }

  classes = sorted(os.listdir(dataset_path))
  for i_class, curr_class in enumerate(classes):
    class_path = os.path.join(dataset_path, curr_class)
    instances = sorted(os.listdir(class_path))
    n_instances = len(instances)

    labels = [(i_class, label) for label in list(range(n_instances))]
    random.shuffle(labels)

    train_l = int(n_instances * train_ratio)
    val_l = int(n_instances * val_ratio)
    test_l = int(n_instances * test_ratio)

    curr_train_labels = labels[:train_l]
    curr_val_labels = labels[train_l:train_l + val_l]
    curr_test_labels = labels[train_l + val_l:train_l + val_l + test_l]
    
    dataset_labels['train'] += curr_train_labels
    dataset_labels['val'] += curr_val_labels
    dataset_labels['test'] += curr_test_labels

  return dataset_labels

In [7]:
class ImageDataset(torch.utils.data.Dataset):
  def __init__(self, dataset_path, labels, transform):
    self.dataset_path = dataset_path
    self.labels = labels
    self.transform = transform

    classes = sorted(os.listdir(self.dataset_path))
    self.n_classes = len(classes)

    self.classes_path = [os.path.join(self.dataset_path, c) for c in classes]
    self.instances_path = [[os.path.join(class_path, instance)
        for instance in sorted(os.listdir(class_path))]
      for class_path in self.classes_path]

  def __getitem__(self, index):
    # anc = anchor, pos = positive, neg = negative
    anc_class_index, anc_instance_index = self.labels[index]
    
    pos_class_index = anc_class_index
    pos_instance_index = random.choice([instance_index
        for instance_index in range(len(self.instances_path[pos_class_index]))
        if instance_index != anc_instance_index])
    
    neg_class_index = random.choice([class_index
        for class_index in range(len(self.classes_path))
        if class_index != anc_class_index])
    neg_instance_index = random.choice(
        range(len(self.instances_path[neg_class_index])))
    
    anc_img_path = self.instances_path[anc_class_index][anc_instance_index]
    pos_img_path = self.instances_path[pos_class_index][pos_instance_index]
    neg_img_path = self.instances_path[neg_class_index][neg_instance_index]
    
    anc_img = Image.open(anc_img_path).convert('RGB')
    pos_img = Image.open(pos_img_path).convert('RGB')
    neg_img = Image.open(neg_img_path).convert('RGB')

    anc_x = self.transform(anc_img)
    pos_x = self.transform(pos_img)
    neg_x = self.transform(neg_img)

    return anc_x, pos_x, neg_x

  def __len__(self):
    return len(self.labels)

In [8]:
def embedder_model(n_embeddings):
  x = torchvision.models.resnet50(pretrained=True)
  x.fc = torch.nn.Sequential(
      torch.nn.Linear(2048, n_embeddings),
      torch.nn.Sigmoid())

  return x

In [9]:
def train(model, criterion, optimizer, scheduler, n_epochs):
  best_weights = copy.deepcopy(model.state_dict())
  best_acc = 0.0

  for epoch in range(n_epochs):
    for phase in ['train', 'val']:
      if phase == 'train':
        model.train()
      else:
        model.eval()

      epoch_loss = 0.0
      epoch_acc = 0.0
      n_seen_samples = 0

      dataloader = tqdm.tqdm(dataloaders[phase], total=len(dataloaders[phase]),
          position=0, leave=True)
      for anc_x, pos_x, neg_x in dataloader:
        curr_batch_size = anc_x.shape[0]
        n_seen_samples += curr_batch_size

        optimizer.zero_grad()

        anc_x = anc_x.to(device)
        pos_x = pos_x.to(device)
        neg_x = neg_x.to(device)

        anc_y = model(anc_x)
        pos_y = model(pos_x)
        neg_y = model(neg_x)

        loss = criterion(anc_y, pos_y, neg_y)

        if phase == 'train':
          loss.backward()
          optimizer.step()

        epoch_loss += loss.item() * curr_batch_size

        anc_pos_dists = (anc_y - pos_y).pow(2).sum(1)
        anc_neg_dists = (anc_y - neg_y).pow(2).sum(1)
        epoch_acc += torch.sum(anc_pos_dists + 1 < anc_neg_dists).item()
        
        curr_loss = epoch_loss / n_seen_samples
        curr_acc = epoch_acc / n_seen_samples

        dataloader.set_postfix(Epoch='%s/%s' % (epoch+1, n_epochs),
            Loss=curr_loss, Acc=curr_acc, refresh=True)
        
      if phase == 'train':
        scheduler.step()
      
      epoch_loss /= len(datasets[phase])
      epoch_acc /= len(datasets[phase])
    
      if phase == 'val' and epoch_acc > best_acc:
        best_acc = epoch_acc
        best_weights = copy.deepcopy(model.state_dict())
  
  model.load_state_dict(best_weights)

  return model, best_acc

In [10]:
def test(model):
  model.eval()
  acc = 0.0
  n_seen_samples = 0

  dataloader = tqdm.tqdm(dataloaders['test'], total=len(dataloaders['test']),
      position=0, leave=True)
  for anc_x, pos_x, neg_x in dataloader:
    curr_batch_size = anc_x.shape[0]
    n_seen_samples += curr_batch_size

    optimizer.zero_grad()

    anc_x = anc_x.to(device)
    pos_x = pos_x.to(device)
    neg_x = neg_x.to(device)

    anc_y = model(anc_x)
    pos_y = model(pos_x)
    neg_y = model(neg_x)

    anc_pos_dists = (anc_y - pos_y).pow(2).sum(1)
    anc_neg_dists = (anc_y - neg_y).pow(2).sum(1)
    acc += torch.sum(anc_pos_dists + 1 < anc_neg_dists).item()

    curr_acc = acc / n_seen_samples

    dataloader.set_postfix(Acc=curr_acc, refresh=True)

  acc / len(datasets['test'])

  return acc

In [11]:
dataset_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),
    'val': transforms.Compose([
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),
    'test': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) }

In [12]:
dataset_path = os.path.join(root, 'dogs', 'train')

split_ratio = [0.7, 0.15, 0.15]

dataset_labels = get_dataset_split_labels(dataset_path, split_ratio)

datasets = {
    'train': ImageDataset(dataset_path, dataset_labels['train'],
        dataset_transforms['train']),
    'val': ImageDataset(dataset_path, dataset_labels['val'],
        dataset_transforms['val']),
    'test': ImageDataset(dataset_path, dataset_labels['test'],
        dataset_transforms['test']) }

n_embeddings = 128

n_epochs = 5

batch_size = 8

n_workers = 8

model = embedder_model(n_embeddings)
model = torch.jit.script(model).to(device)

criterion = torch.nn.TripletMarginLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

dataloaders = {
    'train': torch.utils.data.DataLoader(datasets['train'],
        batch_size=batch_size, num_workers=n_workers, shuffle=True),
    'val': torch.utils.data.DataLoader(datasets['val'], batch_size=batch_size,
        num_workers=n_workers, shuffle=True),
    'test': torch.utils.data.DataLoader(datasets['test'], batch_size=batch_size,
        num_workers=n_workers, shuffle=True) }

trained_model_ckpt_path = os.path.join(root, 'models', 'embedder.pth')

Downloading: "https://download.pytorch.org/models/resnet50-19c8e357.pth" to /root/.cache/torch/hub/checkpoints/resnet50-19c8e357.pth


HBox(children=(FloatProgress(value=0.0, max=102502400.0), HTML(value='')))




In [13]:
trained_model, val_acc = train(model, criterion, optimizer, scheduler, n_epochs)
torch.save({
    'state_dict': trained_model.state_dict(),
    'acc': val_acc,
    'n_embeddings': n_embeddings }, trained_model_ckpt_path)

100%|██████████| 1513/1513 [11:05<00:00,  2.27it/s, Acc=0.726, Epoch=1/5, Loss=0.383]
100%|██████████| 319/319 [00:50<00:00,  6.32it/s, Acc=0.893, Epoch=1/5, Loss=0.197]
100%|██████████| 1513/1513 [07:25<00:00,  3.40it/s, Acc=0.842, Epoch=2/5, Loss=0.291]
100%|██████████| 319/319 [00:45<00:00,  6.98it/s, Acc=0.914, Epoch=2/5, Loss=0.176]
100%|██████████| 1513/1513 [07:23<00:00,  3.41it/s, Acc=0.857, Epoch=3/5, Loss=0.279]
100%|██████████| 319/319 [00:44<00:00,  7.11it/s, Acc=0.919, Epoch=3/5, Loss=0.163]
100%|██████████| 1513/1513 [07:22<00:00,  3.42it/s, Acc=0.866, Epoch=4/5, Loss=0.26]
100%|██████████| 319/319 [00:45<00:00,  7.06it/s, Acc=0.923, Epoch=4/5, Loss=0.157]
100%|██████████| 1513/1513 [07:22<00:00,  3.42it/s, Acc=0.863, Epoch=5/5, Loss=0.267]
100%|██████████| 319/319 [00:45<00:00,  7.09it/s, Acc=0.917, Epoch=5/5, Loss=0.167]


In [14]:
trained_model = embedder_model(n_embeddings)
trained_model.load_state_dict(torch.load(trained_model_ckpt_path)['state_dict'])
trained_model.eval()
trained_model = torch.jit.script(trained_model).to(device)

test_acc = test(trained_model)

100%|██████████| 319/319 [00:58<00:00,  5.48it/s, Acc=0.944]
