In [1]:
import os
import random
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision

from torch.utils.data import Dataset, DataLoader, BatchSampler, random_split
from torchvision import transforms
from PIL import Image

In [2]:
from torchvision import models
backbone = torch.hub.load('pytorch/vision:v0.10.0', 'mobilenet_v2', pretrained=True)

Downloading: "https://github.com/pytorch/vision/zipball/v0.10.0" to /root/.cache/torch/hub/v0.10.0.zip
Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth
100%|██████████| 13.6M/13.6M [00:00<00:00, 94.1MB/s]


In [4]:
for i, layer in enumerate(backbone.children()):
  print('i: ', i)
  for j, sublayer in enumerate(layer.children()):
    print('j: ', j)
    print('sublayer: ', sublayer)

i:  0
j:  0
sublayer:  Conv2dNormActivation(
  (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
  (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU6(inplace=True)
)
j:  1
sublayer:  InvertedResidual(
  (conv): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
)
j:  2
sublayer:  InvertedResidual(
  (conv): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inpl

In [5]:
from google.colab import drive
drive.mount('/content/drive')
%cd drive/MyDrive/NNDL_Project

Mounted at /content/drive
/content/drive/MyDrive/NNDL_Project


In [6]:
%%capture
! unzip -o train_shuffle.zip

In [7]:
%%capture
! unzip -o test_shuffle.zip

In [8]:
# Create Dataset class for multilabel classification
class MultiClassImageDataset(Dataset):
    def __init__(self, ann_df, super_map_df, sub_map_df, img_dir, transform=None):
        self.ann_df = ann_df
        self.super_map_df = super_map_df
        self.sub_map_df = sub_map_df
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.ann_df)

    def __getitem__(self, idx):
        img_name = self.ann_df['image'][idx]
        img_path = os.path.join(self.img_dir, img_name)
        image = Image.open(img_path).convert('RGB')

        super_idx = self.ann_df['superclass_index'][idx]
        super_label = self.super_map_df['class'][super_idx]

        sub_idx = self.ann_df['subclass_index'][idx]
        sub_label = self.sub_map_df['class'][sub_idx]

        if self.transform:
            image = self.transform(image)

        return image, super_idx, super_label, sub_idx, sub_label

class MultiClassImageTestDataset(Dataset):
    def __init__(self, super_map_df, sub_map_df, img_dir, transform=None):
        self.super_map_df = super_map_df
        self.sub_map_df = sub_map_df
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self): # Count files in img_dir
        return len([fname for fname in os.listdir(self.img_dir)])

    def __getitem__(self, idx):
        img_name = str(idx) + '.jpg'
        img_path = os.path.join(self.img_dir, img_name)
        image = Image.open(img_path).convert('RGB')

        if self.transform:
            image = self.transform(image)

        return image, img_name

In [9]:
train_ann_df = pd.read_csv('train_data.csv')
super_map_df = pd.read_csv('superclass_mapping.csv')
sub_map_df = pd.read_csv('subclass_mapping.csv')

train_img_dir = 'train_shuffle'
test_img_dir = 'test_shuffle'

image_preprocessing = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=(0), std=(1)),
])

# Create train and val split
train_dataset = MultiClassImageDataset(train_ann_df, super_map_df, sub_map_df, train_img_dir, transform=image_preprocessing)
train_dataset, val_dataset = random_split(train_dataset, [0.9, 0.1])

# Create test dataset
test_dataset = MultiClassImageTestDataset(super_map_df, sub_map_df, test_img_dir, transform=image_preprocessing)

# Create dataloaders
batch_size = 64
train_loader = DataLoader(train_dataset,
                          batch_size=batch_size,
                          shuffle=True, drop_last = True)

val_loader = DataLoader(val_dataset,
                        batch_size=batch_size,
                        shuffle=True, drop_last = True)

test_loader = DataLoader(test_dataset,
                         batch_size=1,
                         shuffle=False)

In [26]:
# Simple CNN
class CNN(nn.Module):
    def __init__(self, backbone):
        super().__init__()

        self.backbone = backbone
        num_ftrs = self.backbone.classifier[1].in_features
        self.backbone.classifier[1] = torch.nn.Identity()
        for param in self.backbone.parameters():
          param.requires_grad = False

        self.fca= nn.Linear(num_ftrs, 4)
        self.fcb= nn.Linear(num_ftrs, 88)


    def forward(self, x):
        x_new = self.backbone(x)
        super_out = self.fca(x_new)
        sub_out = self.fcb(x_new)
        return super_out, sub_out

class Trainer():
    def __init__(self, model, criterion, optimizer, train_loader, val_loader, test_loader=None, device='cuda'):
        self.model = model
        self.criterion = criterion
        self.optimizer = optimizer
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.test_loader = test_loader
        self.device = device
        self.superclass_probs = {i:[] for i in range(4)}
        self.subclass_probs = {i:[] for i in range(88)}

    def train_epoch(self):
        running_loss = 0.0
        device = self.device
        for i, data in enumerate(self.train_loader):
            inputs, super_labels, sub_labels = data[0].to(device), data[1].to(device), data[3].to(device)

            self.optimizer.zero_grad()
            super_outputs, sub_outputs = self.model(inputs)
            loss = self.criterion(super_outputs, super_labels) + self.criterion(sub_outputs, sub_labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        print(f'Training loss: {running_loss/i:.3f}')

    def validate_epoch(self, append_probs=False):
        super_correct = 0
        sub_correct = 0
        total = 0
        running_loss = 0.0
        device = self.device
        with torch.no_grad():
            for i, data in enumerate(self.val_loader):
                inputs, super_labels, sub_labels = data[0].to(device), data[1].to(device), data[3].to(device)

                super_outputs, sub_outputs = self.model(inputs)
                loss = self.criterion(super_outputs, super_labels) + self.criterion(sub_outputs, sub_labels)
                _, super_predicted = torch.max(super_outputs.data, 1)
                _, sub_predicted = torch.max(sub_outputs.data, 1)

                total += super_labels.size(0)
                super_correct += (super_predicted == super_labels).sum().item()
                sub_correct += (sub_predicted == sub_labels).sum().item()
                running_loss += loss.item()

                if append_probs:
                  probs_super, classes_super = torch.max(torch.nn.functional.softmax(super_outputs.data, dim=1), dim=1)
                  for j, Class in enumerate(classes_super):
                    self.superclass_probs[Class.item()].append(probs_super[j])

                  probs_sub, classes_sub = torch.max(torch.nn.functional.softmax(sub_outputs.data, dim=1), dim=1)
                  for j, Class in enumerate(classes_sub):
                    self.subclass_probs[Class.item()].append(probs_sub[j])

            if append_probs:
              self.superclass_probs_mean = {k:torch.mean(torch.tensor(v)) for (k,v) in zip(self.superclass_probs.keys(), self.superclass_probs.values())}
              self.superclass_probs_sigma = {k:torch.std(torch.tensor(v)) for (k,v) in zip(self.superclass_probs.keys(), self.superclass_probs.values())}
              self.subclass_probs_mean = {k:torch.mean(torch.tensor(v)) for (k,v) in zip(self.subclass_probs.keys(), self.subclass_probs.values())}
              self.subclass_probs_sigma = {k:torch.std(torch.tensor(v)) for (k,v) in zip(self.subclass_probs.keys(), self.subclass_probs.values())}
              print("self.superclass_probs_mean: ", self.superclass_probs_mean)
              print("self.superclass_probs_sigma: ", self.superclass_probs_sigma)
              print("self.subclass_probs_mean: ", self.subclass_probs_mean)
              print("self.subclass_probs_sigma: ", self.subclass_probs_sigma)

        print(f'Validation loss: {running_loss/i:.3f}')
        print(f'Validation superclass acc: {100 * super_correct / total:.2f} %')
        print(f'Validation subclass acc: {100 * sub_correct / total:.2f} %')

    def test(self, save_to_csv=False, return_predictions=False):
        if not self.test_loader:
            raise NotImplementedError('test_loader not specified')



        # decide on threshold probability cuts

        z = 1.5
        mean_of_superclass_probs_mean = torch.nanmean(torch.tensor([self.superclass_probs_mean[k] for k in self.superclass_probs_mean], dtype=torch.float32))
        mean_of_superclass_probs_sigma = torch.nanmean(torch.tensor([self.superclass_probs_sigma[k] for k in self.superclass_probs_sigma], dtype=torch.float32))
        self.superclass_prob_thresholds = {}
        for i in range(4):
          if not torch.isnan(self.superclass_probs_mean[i]) and not torch.isnan(self.superclass_probs_sigma[i]):
            self.superclass_prob_thresholds[i] = self.superclass_probs_mean[i] - z*self.superclass_probs_sigma[i]
          else:
            self.superclass_prob_thresholds[i] = mean_of_superclass_probs_mean - z*mean_of_superclass_probs_sigma
        print('self.superclass_prob_thresholds: ', self.superclass_prob_thresholds)

        mean_of_subclass_probs_mean = torch.nanmean(torch.tensor([self.subclass_probs_mean[k] for k in self.subclass_probs_mean], dtype=torch.float32))
        mean_of_subclass_probs_sigma = torch.nanmean(torch.tensor([self.subclass_probs_sigma[k] for k in self.subclass_probs_sigma], dtype=torch.float32))
        self.subclass_prob_thresholds = {}
        for i in range(88):
          if not torch.isnan(self.subclass_probs_mean[i]) and not torch.isnan(self.subclass_probs_sigma[i]):
            self.subclass_prob_thresholds[i] = self.subclass_probs_mean[i] - z*self.subclass_probs_sigma[i]
          else:
            self.subclass_prob_thresholds[i] = mean_of_subclass_probs_mean - z*mean_of_subclass_probs_sigma
        print('self.subclass_prob_thresholds: ', self.subclass_prob_thresholds)



        # Evaluate on test set, in this simple demo no special care is taken for novel/unseen classes
        test_predictions = {'image': [], 'superclass_index': [], 'subclass_index': []}
        with torch.no_grad():
            for i, data in enumerate(self.test_loader):
                inputs, img_name = data[0].to(device), data[1]

                super_outputs, sub_outputs = self.model(inputs)

                # if i < 50: #uncomment to get an idea of probabilities
                #   print('softmax super: ', torch.max(torch.nn.functional.softmax(super_outputs.data, dim=1)))
                #   print('softmax sub: ', torch.max(torch.nn.functional.softmax(sub_outputs.data, dim=1)))
                #   print('')
                #   print('')

                super_predicted = (torch.max(super_outputs.data, 1)[1]).item()
                sub_predicted = (torch.max(sub_outputs.data, 1)[1]).item()
                if torch.max(torch.nn.functional.softmax(super_outputs.data, dim=1)) < self.superclass_prob_thresholds[super_predicted]:
                  super_predicted = 3 #novel
                  sub_predicted = 87 #novel
                else:
                  if torch.max(torch.nn.functional.softmax(sub_outputs.data, dim=1)) < self.subclass_prob_thresholds[sub_predicted]:
                    sub_predicted = 87 #novel
                test_predictions['subclass_index'].append(sub_predicted)
                test_predictions['superclass_index'].append(super_predicted)


                test_predictions['image'].append(img_name[0])

        test_predictions = pd.DataFrame(data=test_predictions)

        if save_to_csv:
            test_predictions.to_csv('probabilistic_superandsub_test_predictions.csv', index=False)

        if return_predictions:
            return test_predictions

In [11]:
def change_last_layer(backbone):
  for i, layer in enumerate(backbone.children()):
    if i == 0: #just train last of the main convolutional layers
      for param in layer.parameters():
        param.requires_grad = False
    else:
      for param in layer.parameters():
        param.requires_grad = True
  return


def fine_tune(backbone, cut):
  for i, layer in enumerate(backbone.children()):
    for j, sublayer in enumerate(layer.children()):
      if j < cut: #just train last of the main convolutional layers
        for param in sublayer.parameters():
          param.requires_grad = False
      else:
        for param in sublayer.parameters():
          param.requires_grad = True
    break
  return

In [24]:
backbone.classifier[1]


Linear(in_features=1280, out_features=1000, bias=True)

In [27]:
# Init model and trainer
device = 'cuda'
finetune = False
model = CNN(backbone).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
trainer = Trainer(model, criterion, optimizer, train_loader, val_loader, test_loader)

In [29]:
# Training loop
print('Trainable parameters: ', sum(p.numel() for p in model.parameters() if p.requires_grad))
print('Total parameters: ', sum(p.numel() for p in model.parameters()))
print('----------------------')
print('')
Nepochs = 25
for epoch in range(Nepochs):
    print(f'Epoch {epoch+1}')
    trainer.train_epoch()
    if epoch == Nepochs-1:
      trainer.validate_epoch(append_probs=True)
    else:
      trainer.validate_epoch(append_probs=False)
    print('')
    if epoch == 10:
      fine_tune(trainer.model.backbone, 17)
      print('')
      print('Trainable parameters: ', sum(p.numel() for p in trainer.model.parameters() if p.requires_grad))
      print('Total parameters: ', sum(p.numel() for p in trainer.model.parameters()))
      print('----------------------')
      print('')
      trainer = Trainer(trainer.model, trainer.criterion, trainer.optimizer, trainer.train_loader, trainer.val_loader, trainer.test_loader)
    elif epoch == 20:
      fine_tune(trainer.model.backbone, 16)
      print('')
      print('Trainable parameters: ', sum(p.numel() for p in trainer.model.parameters() if p.requires_grad))
      print('Total parameters: ', sum(p.numel() for p in trainer.model.parameters()))
      print('----------------------')
      print('')
      trainer = Trainer(trainer.model, trainer.criterion, trainer.optimizer, trainer.train_loader, trainer.val_loader, trainer.test_loader)


print('Finished Training')

Trainable parameters:  1323932
Total parameters:  2341724
----------------------

Epoch 1
Training loss: 2.062
Validation loss: 3.408
Validation superclass acc: 80.73 %
Validation subclass acc: 35.24 %

Epoch 2
Training loss: 1.794
Validation loss: 3.451
Validation superclass acc: 84.38 %
Validation subclass acc: 37.85 %

Epoch 3
Training loss: 1.612
Validation loss: 3.601
Validation superclass acc: 82.47 %
Validation subclass acc: 37.15 %

Epoch 4
Training loss: 1.384
Validation loss: 3.613
Validation superclass acc: 80.56 %
Validation subclass acc: 39.41 %

Epoch 5
Training loss: 1.240
Validation loss: 3.583
Validation superclass acc: 81.94 %
Validation subclass acc: 37.85 %

Epoch 6
Training loss: 1.138
Validation loss: 3.661
Validation superclass acc: 84.20 %
Validation subclass acc: 38.37 %

Epoch 7
Training loss: 1.016
Validation loss: 3.866
Validation superclass acc: 82.81 %
Validation subclass acc: 40.28 %

Epoch 8
Training loss: 0.935
Validation loss: 3.866
Validation supercla

In [30]:
trainer.model.eval()
predictions = trainer.test(save_to_csv=False, return_predictions=True)

'''
This simple baseline scores the following test accuracy

Superclass Accuracy
Overall: 43.83 %
Seen: 61.11 %
Unseen: 0.00 %

Subclass Accuracy
Overall: 2.03 %
Seen: 9.56 %
Unseen: 0.00 %
'''

self.superclass_prob_thresholds:  {0: tensor(0.7028), 1: tensor(0.8049), 2: tensor(0.7007), 3: tensor(0.7361)}
self.subclass_prob_thresholds:  {0: tensor(0.7046), 1: tensor(0.2737), 2: tensor(0.2267), 3: tensor(0.2952), 4: tensor(0.8467), 5: tensor(0.4021), 6: tensor(0.5752), 7: tensor(0.4109), 8: tensor(0.6237), 9: tensor(0.2962), 10: tensor(0.6525), 11: tensor(0.8798), 12: tensor(0.3142), 13: tensor(0.6484), 14: tensor(0.5606), 15: tensor(0.3094), 16: tensor(0.4975), 17: tensor(0.2873), 18: tensor(0.7337), 19: tensor(0.7372), 20: tensor(0.1669), 21: tensor(0.1916), 22: tensor(0.2108), 23: tensor(0.3952), 24: tensor(0.5464), 25: tensor(0.5706), 26: tensor(0.3952), 27: tensor(0.3783), 28: tensor(0.6444), 29: tensor(0.5909), 30: tensor(0.2308), 31: tensor(0.6067), 32: tensor(0.3952), 33: tensor(0.3021), 34: tensor(0.3738), 35: tensor(0.3208), 36: tensor(0.4871), 37: tensor(0.3062), 38: tensor(0.0973), 39: tensor(0.3160), 40: tensor(0.3172), 41: tensor(0.3952), 42: tensor(0.3135), 43: te

'\nThis simple baseline scores the following test accuracy\n\nSuperclass Accuracy\nOverall: 43.83 %\nSeen: 61.11 %\nUnseen: 0.00 %\n\nSubclass Accuracy\nOverall: 2.03 %\nSeen: 9.56 %\nUnseen: 0.00 %\n'

In [31]:
predictions

Unnamed: 0,image,superclass_index,subclass_index
0,0.jpg,1,85
1,1.jpg,0,80
2,2.jpg,2,34
3,3.jpg,2,43
4,4.jpg,1,46
...,...,...,...
12372,12372.jpg,0,87
12373,12373.jpg,2,15
12374,12374.jpg,2,44
12375,12375.jpg,2,33


In [32]:
predictions[predictions['superclass_index']==0]

Unnamed: 0,image,superclass_index,subclass_index
1,1.jpg,0,80
5,5.jpg,0,87
6,6.jpg,0,87
11,11.jpg,0,20
12,12.jpg,0,27
...,...,...,...
12360,12360.jpg,0,87
12361,12361.jpg,0,14
12362,12362.jpg,0,40
12367,12367.jpg,0,87


In [33]:
predictions['superclass_index'].value_counts()

0    3897
2    3410
1    3145
3    1925
Name: superclass_index, dtype: int64

In [34]:
predictions['subclass_index'].value_counts()

87    3800
44     283
46     280
17     232
35     212
      ... 
19      30
25      25
61      24
23      11
48       4
Name: subclass_index, Length: 88, dtype: int64

In [35]:
predictions.to_csv('probabilistic_superandsub_test_predictions_mobilenet.csv', index=False)