In [1]:
# citation: https://drive.google.com/drive/folders/1idfa8y7esf7usGo7SSxsH4iKBECEPFNr?usp=share_link 

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
import os
import random
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision

from torch.utils.data import Dataset, DataLoader, BatchSampler, random_split
from torchvision import transforms
from PIL import Image

In [2]:
# Create Dataset class for multilabel classification
class MultiClassImageDataset(Dataset):
    def __init__(self, ann_df, super_map_df, sub_map_df, img_dir, transform=None):
        self.ann_df = ann_df
        self.super_map_df = super_map_df
        self.sub_map_df = sub_map_df
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.ann_df)

    def __getitem__(self, idx):
        img_name = self.ann_df['image'][idx]
        img_path = os.path.join(self.img_dir, img_name)
        image = Image.open(img_path).convert('RGB')

        super_idx = self.ann_df['superclass_index'][idx]
        super_label = self.super_map_df['class'][super_idx]

        sub_idx = self.ann_df['subclass_index'][idx]
        sub_label = self.sub_map_df['class'][sub_idx]

        if self.transform:
            image = self.transform(image)

        return image, super_idx, super_label, sub_idx, sub_label

class MultiClassImageTestDataset(Dataset):
    def __init__(self, super_map_df, sub_map_df, img_dir, transform=None):
        self.super_map_df = super_map_df
        self.sub_map_df = sub_map_df
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self): # Count files in img_dir
        return len([fname for fname in os.listdir(self.img_dir)])

    def __getitem__(self, idx):
        img_name = str(idx) + '.jpg'
        img_path = os.path.join(self.img_dir, img_name)
        image = Image.open(img_path).convert('RGB')

        if self.transform:
            image = self.transform(image)

        return image, img_name

In [32]:
train_ann_df = pd.read_csv('Released_Data/train_data.csv')
super_map_df = pd.read_csv('Released_Data/superclass_mapping.csv')
sub_map_df = pd.read_csv('Released_Data/subclass_mapping.csv')

train_img_dir = 'Released_Data/train_shuffle'
test_img_dir = 'Released_Data/test_shuffle'

image_preprocessing = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=(0), std=(1)),
])

# Create train and val split
train_dataset = MultiClassImageDataset(train_ann_df, super_map_df, sub_map_df, train_img_dir, transform=image_preprocessing)
train_dataset, val_dataset = random_split(train_dataset, [0.9, 0.1])

# Create test dataset
test_dataset = MultiClassImageTestDataset(super_map_df, sub_map_df, test_img_dir, transform=image_preprocessing)

# Create dataloaders
batch_size = 64
train_loader = DataLoader(train_dataset,
                          batch_size=batch_size,
                          shuffle=True)

val_loader = DataLoader(val_dataset,
                        batch_size=batch_size,
                        shuffle=True)

test_loader = DataLoader(test_dataset,
                         batch_size=1,
                         shuffle=False)

In [45]:
# Simple CNN

import torch.nn.functional as F

class CNN(nn.Module):
    def __init__(self):
        super().__init__()

        self.block1 = nn.Sequential(
                        nn.Conv2d(3, 32, 3, padding='same'),
                        nn.ReLU(),
                        nn.BatchNorm2d(32),
                        nn.Conv2d(32, 32, 3, padding='same'),
                        nn.ReLU(),
                        nn.BatchNorm2d(32),
                        nn.Conv2d(32, 32, 3, padding='same'),
                        nn.ReLU(),
                        nn.BatchNorm2d(32),
                        nn.MaxPool2d(2, 2)
                      )

        self.block2 = nn.Sequential(
                        nn.Conv2d(32, 64, 3, padding='same'),
                        nn.ReLU(),
                        nn.BatchNorm2d(64),
                        nn.Conv2d(64, 64, 3, padding='same'),
                        nn.ReLU(),
                        nn.BatchNorm2d(64),
                        nn.Conv2d(64, 64, 3, padding='same'),
                        nn.ReLU(),
                        nn.BatchNorm2d(64),
                        nn.MaxPool2d(2, 2)
                      )

        self.block3 = nn.Sequential(
                        nn.Conv2d(64, 128, 3, padding='same'),
                        nn.ReLU(),
                        nn.BatchNorm2d(128),
                        nn.Conv2d(128, 128, 3, padding='same'),
                        nn.ReLU(),
                        nn.BatchNorm2d(128),
                        nn.Conv2d(128, 128, 3, padding='same'),
                        nn.ReLU(),
                        nn.BatchNorm2d(128),
                        nn.MaxPool2d(2, 2)
                      )

        self.fc1 = nn.Linear(4*4*128, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3a = nn.Linear(128, 4)
        self.fc3b = nn.Linear(128, 88)

    def forward(self, x):
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = torch.flatten(x, 1) # flatten all dimensions except batch

        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        super_out = self.fc3a(x)
        sub_out = self.fc3b(x)
        return super_out, sub_out

class Trainer():
    def __init__(self, model, criterion, optimizer, train_loader, val_loader, test_loader=None, device='cpu'):
        self.model = model
        self.criterion = criterion
        self.optimizer = optimizer
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.test_loader = test_loader
        self.device = device
        self.max_super_prob_all = []
        self.max_sub_prob_all = []

    def train_epoch(self):
        running_loss = 0.0
        for i, data in enumerate(self.train_loader):
            inputs, super_labels, sub_labels = data[0].to(self.device), data[1].to(self.device), data[3].to(self.device)

            self.optimizer.zero_grad()
            super_outputs, sub_outputs = self.model(inputs)
            loss = self.criterion(super_outputs, super_labels) + self.criterion(sub_outputs, sub_labels)
            loss.backward()
            self.optimizer.step()

            running_loss += loss.item()

        print(f'Training loss: {running_loss/i:.3f}')

    def validate_epoch(self):
        super_correct = 0
        sub_correct = 0
        total = 0
        running_loss = 0.0
        with torch.no_grad():
            for i, data in enumerate(self.val_loader):
                inputs, super_labels, sub_labels = data[0].to(self.device), data[1].to(self.device), data[3].to(self.device)

                super_outputs, sub_outputs = self.model(inputs)
                loss = self.criterion(super_outputs, super_labels) + self.criterion(sub_outputs, sub_labels)

                # Apply softmax to get probabilities
                super_probs = F.softmax(super_outputs, dim=1)
                sub_probs = F.softmax(sub_outputs, dim=1)

                # Get maximum probability values and corresponding predicted classes
                max_super_prob, super_predicted = torch.max(super_probs, 1)
                max_sub_prob, sub_predicted = torch.max(sub_probs, 1)

                # max_super_prob, super_predicted = torch.max(super_outputs.data, 1)
                # max_sub_prob, sub_predicted = torch.max(sub_outputs.data, 1)

                print('max_super_prob:', max_super_prob)
                print('max_sub_prob:', max_sub_prob)

                self.max_super_prob_all.append(max_super_prob)
                self.max_sub_prob_all.append(max_sub_prob)

                total += super_labels.size(0)
                super_correct += (super_predicted == super_labels).sum().item()
                sub_correct += (sub_predicted == sub_labels).sum().item()
                running_loss += loss.item()

        print(f'Validation loss: {running_loss/i:.3f}')
        print(f'Validation superclass acc: {100 * super_correct / total:.2f} %')
        print(f'Validation subclass acccc: {100 * sub_correct / total:.2f} %')

    def test(self, save_to_csv=False, return_predictions=False):
        if not self.test_loader:
            raise NotImplementedError('test_loader not specified')

        # Evaluate on test set, in this simple demo no special care is taken for novel/unseen classes
        test_predictions = {'image': [], 'superclass_index': [], 'subclass_index': []}
        with torch.no_grad():
            for i, data in enumerate(self.test_loader):
                inputs, img_name = data[0].to(self.device), data[1]

                super_outputs, sub_outputs = self.model(inputs)
                # Commented is the old method
                # _, super_predicted = torch.max(super_outputs.data, 1)
                # _, sub_predicted = torch.max(sub_outputs.data, 1)

                 # Apply softmax to get probabilities
                super_probs = F.softmax(super_outputs, dim=1)
                sub_probs = F.softmax(sub_outputs, dim=1)

                # is_novel_super = max_prob < threshold

                # Get maximum probability values and corresponding predicted classes
                max_super_prob, super_predicted = torch.max(super_probs, 1)
                max_sub_prob, sub_predicted = torch.max(sub_probs, 1)

                super_threshold = 0.4 # 0.4 gave 0.41053 --> need to drop this to 0.05 or 0.1 and retest
                sub_threshold = 0.2 # because the mean is around 0.6 and the mean is much smaller

                sub_predicted[max_sub_prob < sub_threshold] = 87
                super_predicted[max_super_prob < super_threshold] = 3

                test_predictions['image'].append(img_name[0])
                test_predictions['superclass_index'].append(super_predicted.item())
                test_predictions['subclass_index'].append(sub_predicted.item())

        test_predictions = pd.DataFrame(data=test_predictions)

        if save_to_csv:
            test_predictions.to_csv('example_test_predictions.csv', index=False)

        if return_predictions:
            return test_predictions

    def max_probs_all(self):
        return self.max_super_prob_all, self.max_sub_prob_all


In [46]:
# Init model and trainer
device = 'cuda'
# model = CNN().to(device)
model = CNN().to('cpu')

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
trainer = Trainer(model, criterion, optimizer, train_loader, val_loader, test_loader)

In [50]:
# Training loop
for epoch in range(2):
    print(f'Epoch {epoch+1}')
    trainer.train_epoch()
    trainer.validate_epoch()
    print('')

print('Finished Trainingg')

Epoch 1
Training loss: 3.040
max_super_prob: tensor([0.6657, 0.5631, 0.9895, 0.9146, 0.9977, 0.9994, 0.9912, 0.6462, 0.9396,
        0.6611, 0.9969, 0.9974, 0.9586, 0.9931, 0.9918, 0.8256, 0.9997, 0.9986,
        0.9629, 0.9859, 0.7634, 0.4991, 0.9996, 0.5288, 0.9835, 0.9063, 0.6987,
        0.5091, 0.9290, 0.6756, 0.8347, 0.9710, 0.9486, 0.8479, 0.9653, 0.9583,
        0.9978, 0.7137, 0.9914, 0.7846, 0.9557, 0.8093, 0.9989, 0.9869, 0.9865,
        0.9878, 0.9996, 0.9525, 0.9491, 0.9905, 0.9966, 0.9923, 0.6046, 0.9564,
        0.8670, 0.9558, 0.8502, 0.9185, 0.8845, 0.5025, 0.8239, 0.9885, 0.8152,
        0.9972])
max_sub_prob: tensor([0.2132, 0.1227, 0.3742, 0.6815, 0.3383, 0.7113, 0.2480, 0.1520, 0.2994,
        0.3900, 0.1821, 0.3627, 0.2212, 0.8299, 0.2037, 0.2979, 0.4237, 0.4560,
        0.1488, 0.6197, 0.3022, 0.1017, 0.6158, 0.2608, 0.1957, 0.4277, 0.1306,
        0.2793, 0.1258, 0.1772, 0.7694, 0.8120, 0.1469, 0.1340, 0.4147, 0.0888,
        0.3889, 0.1585, 0.4540, 0.1043, 0.31

In [51]:
return_val = trainer.test(save_to_csv=True, return_predictions=True)

'''
max_super_prob_all, max_sub_prob_all = trainer.max_probs_all()
# print(max_super_prob_all)

max_super_prob_all_flat = [value.item() for tensor in max_super_prob_all for value in tensor.flatten()]


mean_value = np.mean(max_super_prob_all_flat)
median_value = np.median(max_super_prob_all_flat)
std_deviation = np.std(max_super_prob_all_flat)
max_value = np.max(max_super_prob_all_flat)
min_value = np.min(max_super_prob_all_flat)


# For super: 
# mean_value: 0.9514841495482604
# median_value: 0.9995446801185608
# std_deviation: 0.10958402783452965
# max_value: 1.0
# min_value: 0.36131414771080017

super_threshold = 0.4
sub_threshold = 0.2 # because the mean is around 0.6 and the mean is much smaller

max_sub_prob_all_flat = [value.item() for tensor in max_sub_prob_all for value in tensor.flatten()]


mean_value = np.mean(max_sub_prob_all_flat)
median_value = np.median(max_sub_prob_all_flat)
std_deviation = np.std(max_sub_prob_all_flat)
max_value = np.max(max_sub_prob_all_flat)
min_value = np.min(max_sub_prob_all_flat)

print('mean_value:', mean_value)
print('median_value:', median_value)
print('std_deviation:', std_deviation)
print('max_value:', max_value)
print('min_value:', min_value)

# For sub: 
# mean_value: 0.6491095473568391
# median_value: 0.6703433692455292
# std_deviation: 0.2840718384765652
# max_value: 1.0
# min_value: 0.038913045078516006

'''

"\nmax_super_prob_all, max_sub_prob_all = trainer.max_probs_all()\n# print(max_super_prob_all)\n\nmax_super_prob_all_flat = [value.item() for tensor in max_super_prob_all for value in tensor.flatten()]\n\n\nmean_value = np.mean(max_super_prob_all_flat)\nmedian_value = np.median(max_super_prob_all_flat)\nstd_deviation = np.std(max_super_prob_all_flat)\nmax_value = np.max(max_super_prob_all_flat)\nmin_value = np.min(max_super_prob_all_flat)\n\n\n# For super: \n# mean_value: 0.9514841495482604\n# median_value: 0.9995446801185608\n# std_deviation: 0.10958402783452965\n# max_value: 1.0\n# min_value: 0.36131414771080017\n\nsuper_threshold = 0.4\nsub_threshold = 0.2 # because the mean is around 0.6 and the mean is much smaller\n\nmax_sub_prob_all_flat = [value.item() for tensor in max_sub_prob_all for value in tensor.flatten()]\n\n\nmean_value = np.mean(max_sub_prob_all_flat)\nmedian_value = np.median(max_sub_prob_all_flat)\nstd_deviation = np.std(max_sub_prob_all_flat)\nmax_value = np.max(ma

'''
This simple baseline scores the following test accuracy

Superclass Accuracy
Overall: 43.83 %
Seen: 61.11 %
Unseen: 0.00 %

Subclass Accuracy
Overall: 2.03 %
Seen: 9.56 %
Unseen: 0.00 %
'''
