In [1]:
# citation: https://drive.google.com/drive/folders/1idfa8y7esf7usGo7SSxsH4iKBECEPFNr?usp=share_linkÂ 

In [None]:
# from google.colab import drive
import os
import random
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import itertools
import torchvision
from PIL import Image



from torch.utils.data import Dataset, DataLoader, BatchSampler, random_split
from torchvision import transforms
from torch.utils.data import Subset
from PIL import Image

In [2]:
# # Mount Google Drive
# drive.mount('/content/drive')

In [3]:
# Create Dataset class for multilabel classification
class MultiClassImageDataset(Dataset):
    def __init__(self, ann_df, super_map_df, sub_map_df, img_dir, transform=None):
        self.ann_df = ann_df
        self.super_map_df = super_map_df
        self.sub_map_df = sub_map_df
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.ann_df)

    def __getitem__(self, idx):
        img_name = self.ann_df['image'][idx]
        img_path = os.path.join(self.img_dir, img_name)
        image = Image.open(img_path).convert('RGB')

        super_idx = self.ann_df['superclass_index'][idx]
        super_label = self.super_map_df['class'][super_idx]

        sub_idx = self.ann_df['subclass_index'][idx]
        sub_label = self.sub_map_df['class'][sub_idx]

        if self.transform:
            image = self.transform(image)

        return image, super_idx, super_label, sub_idx, sub_label

class MultiClassImageTestDataset(Dataset):
    def __init__(self, super_map_df, sub_map_df, img_dir, transform=None):
        self.super_map_df = super_map_df
        self.sub_map_df = sub_map_df
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self): # Count files in img_dir
        return len([fname for fname in os.listdir(self.img_dir)])

    def __getitem__(self, idx):
        img_name = str(idx) + '.jpg'
        img_path = os.path.join(self.img_dir, img_name)
        image = Image.open(img_path).convert('RGB')

        if self.transform:
            image = self.transform(image)

        return image, img_name

In [4]:
train_ann_df = pd.read_csv('train_data.csv')
super_map_df = pd.read_csv('superclass_mapping.csv')
sub_map_df = pd.read_csv('subclass_mapping.csv')

train_img_dir = 'train_shuffle'
test_img_dir = 'test_shuffle'

image_preprocessing = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=(0), std=(1)),
])

# Create train and val split
train_dataset = MultiClassImageDataset(train_ann_df, super_map_df, sub_map_df, train_img_dir, transform=image_preprocessing)
train_dataset, val_dataset = random_split(train_dataset, [0.9, 0.1])

# Create test dataset
test_dataset = MultiClassImageTestDataset(super_map_df, sub_map_df, test_img_dir, transform=image_preprocessing)

# Create dataloaders
batch_size = 8
train_loader = DataLoader(train_dataset,
                          batch_size=batch_size,
                          shuffle=True)

val_loader = DataLoader(val_dataset,
                        batch_size=batch_size,
                        shuffle=True)

test_loader = DataLoader(test_dataset,
                         batch_size=1,
                         shuffle=False)

In [5]:
# CNN
class CNN(nn.Module):
    def __init__(self):
        super().__init__()

        self.block_1 = nn.Sequential(
                        nn.Conv2d(3, 32, 3, padding='same'),
                        nn.ReLU(),
                        nn.BatchNorm2d(32),
                        nn.Conv2d(32, 32, 3, padding='same'),
                        nn.ReLU(),
                        nn.BatchNorm2d(32),
                        nn.Conv2d(32, 64, 3, padding='same'),
                        nn.ReLU(),
                        nn.BatchNorm2d(64),
            nn.MaxPool2d(2, 2),
            nn.Dropout(0.4),
                        nn.Conv2d(64, 64, 3, padding='same'),
                        nn.ReLU(),
                        nn.BatchNorm2d(64),
                        nn.Conv2d(64, 128, 3, padding='same'),
                        nn.ReLU(),
                        nn.BatchNorm2d(128),
                        nn.Conv2d(128, 128, 3, padding='same'),
                        nn.ReLU(),
                        nn.BatchNorm2d(128),
             nn.MaxPool2d(2, 2),
            nn.Dropout(0.4),
                        nn.Conv2d(128, 256, 3, padding='same'),
                        nn.ReLU(),
                        nn.BatchNorm2d(256),
                        nn.Conv2d(256, 256, 3, padding='same'),
                        nn.ReLU(),
                        nn.BatchNorm2d(256),
                        nn.Conv2d(256, 256, 3, padding='same'),
                        nn.ReLU(),
                        nn.BatchNorm2d(256),
            nn.MaxPool2d(2, 2),
            nn.Dropout(0.4),
                        nn.Conv2d(256, 256, 3, padding='same'),
                        nn.ReLU(),
                        nn.BatchNorm2d(256),
                        nn.Conv2d(256, 256, 3, padding='same'),
                        nn.ReLU(),
                        nn.BatchNorm2d(256),
                        nn.Conv2d(256, 512, 3, padding='same'),
                        nn.ReLU(),
                        nn.BatchNorm2d(512),
            nn.MaxPool2d(2, 2),
            nn.Dropout(0.4),
                      )
        self.fc1 = nn.Linear(2048, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3a = nn.Linear(128, 4)
        self.fc3b = nn.Linear(128, 88)


    def forward(self, x):
        x = self.block_1(x)
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        super_out = self.fc3a(x)
        sub_out = self.fc3b(x)
        return super_out, sub_out

class Trainer():
    def __init__(self, model, criterion, optimizer, train_loader, val_loader, test_loader=None, device='cpu'):
        self.model = model
        self.criterion = criterion
        self.optimizer = optimizer
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.test_loader = test_loader
        self.device = device
        self.max_super_prob_all = []
        self.max_sub_prob_all = []

    def train_epoch(self):
        running_loss = 0.0
        for i, data in enumerate(self.train_loader):
            inputs, super_labels, sub_labels = data[0].to(device), data[1].to(device), data[3].to(device)

            self.optimizer.zero_grad()
            super_outputs, sub_outputs = self.model(inputs)
            loss = self.criterion(super_outputs, super_labels) + self.criterion(sub_outputs, sub_labels)
            loss.backward()
            self.optimizer.step()

            running_loss += loss.item()

        print(f'Training loss: {running_loss/i:.3f}')

    def validate_epoch(self):
        super_correct = 0
        sub_correct = 0
        total = 0
        running_loss = 0.0
        with torch.no_grad():
            for i, data in enumerate(self.val_loader):
                inputs, super_labels, sub_labels = data[0].to(device), data[1].to(device), data[3].to(device)

                super_outputs, sub_outputs = self.model(inputs)
                loss = self.criterion(super_outputs, super_labels) + self.criterion(sub_outputs, sub_labels)

                # Apply softmax to get probabilities
                super_probs = F.softmax(super_outputs, dim=1)
                sub_probs = F.softmax(sub_outputs, dim=1)

                # Get maximum probability values and corresponding predicted classes
                max_super_prob, super_predicted = torch.max(super_probs, 1)
                max_sub_prob, sub_predicted = torch.max(sub_probs, 1)

                # max_super_prob, super_predicted = torch.max(super_outputs.data, 1)
                # max_sub_prob, sub_predicted = torch.max(sub_outputs.data, 1)

                print('max_super_prob:', max_super_prob)
                print('max_sub_prob:', max_sub_prob)

                self.max_super_prob_all.append(max_super_prob)
                self.max_sub_prob_all.append(max_sub_prob)

                total += super_labels.size(0)
                super_correct += (super_predicted == super_labels).sum().item()
                sub_correct += (sub_predicted == sub_labels).sum().item()
                running_loss += loss.item()

        print(f'Validation loss: {running_loss/i:.3f}')
        print(f'Validation superclass acc: {100 * super_correct / total:.2f} %')
        print(f'Validation subclass acccc: {100 * sub_correct / total:.2f} %')

    def test(self, save_to_csv=False, return_predictions=False):
        if not self.test_loader:
            raise NotImplementedError('test_loader not specified')

        # Evaluate on test set, in this simple demo no special care is taken for novel/unseen classes
        test_predictions = {'image': [], 'superclass_index': [], 'subclass_index': []}
        with torch.no_grad():
            for i, data in enumerate(self.test_loader):
                inputs, img_name = data[0].to(device), data[1]

                super_outputs, sub_outputs = self.model(inputs)
                # Commented is the old method
                # _, super_predicted = torch.max(super_outputs.data, 1)
                # _, sub_predicted = torch.max(sub_outputs.data, 1)

                 # Apply softmax to get probabilities
                super_probs = F.softmax(super_outputs, dim=1)
                sub_probs = F.softmax(sub_outputs, dim=1)

                # is_novel_super = max_prob < threshold

                # Get maximum probability values and corresponding predicted classes
                max_super_prob, super_predicted = torch.max(super_probs, 1)
                max_sub_prob, sub_predicted = torch.max(sub_probs, 1)

                super_threshold = 0.3 # 0.4 gave 0.41053 --> need to drop this to 0.05 or 0.1 and retest
                sub_threshold = 0.4 # because the mean is around 0.6 and the mean is much smaller

                sub_predicted[max_sub_prob < sub_threshold] = 87
                super_predicted[max_super_prob < super_threshold] = 3

                test_predictions['image'].append(img_name[0])
                test_predictions['superclass_index'].append(super_predicted.item())
                test_predictions['subclass_index'].append(sub_predicted.item())

        test_predictions = pd.DataFrame(data=test_predictions)

        if save_to_csv:
            test_predictions.to_csv('example_test_predictions.csv', index=False)

        if return_predictions:
            return test_predictions

    def max_probs_all(self):
        return self.max_super_prob_all, self.max_sub_prob_all


In [6]:
# Init model and trainer
device = 'cuda'
model = CNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-2)
trainer = Trainer(model, criterion, optimizer, train_loader, val_loader, test_loader)

In [7]:
# Training loop
for epoch in range(50):
    print(f'Epoch {epoch+1}')
    trainer.train_epoch()
    trainer.validate_epoch()
    print('')

print('Finished Training')

Epoch 1
Training loss: 4.792
max_super_prob: tensor([0.8417, 0.9125, 0.7970, 0.9648, 0.8019, 0.6711, 0.9360, 0.8962],
       device='cuda:0')
max_sub_prob: tensor([0.1319, 0.1614, 0.0589, 0.1174, 0.1138, 0.1292, 0.0856, 0.0564],
       device='cuda:0')
max_super_prob: tensor([0.8920, 0.9746, 0.4552, 0.8675, 0.9700, 0.9764, 0.6112, 0.6525],
       device='cuda:0')
max_sub_prob: tensor([0.0713, 0.1161, 0.0282, 0.1049, 0.1251, 0.0906, 0.0393, 0.1153],
       device='cuda:0')
max_super_prob: tensor([0.9222, 0.7704, 0.9618, 0.5957, 0.7589, 0.9856, 0.7771, 0.9733],
       device='cuda:0')
max_sub_prob: tensor([0.0806, 0.0732, 0.1196, 0.0295, 0.0511, 0.1106, 0.0533, 0.1093],
       device='cuda:0')
max_super_prob: tensor([0.7545, 0.9775, 0.4840, 0.9928, 0.8211, 0.9130, 0.8735, 0.9588],
       device='cuda:0')
max_sub_prob: tensor([0.0431, 0.1219, 0.0350, 0.1456, 0.0871, 0.0937, 0.0549, 0.1034],
       device='cuda:0')
max_super_prob: tensor([0.9294, 0.9646, 0.9682, 0.9655, 0.7801, 0.9558, 0.9

In [8]:
trainer.test(save_to_csv=True, return_predictions=True)

'''
This simple baseline scores the following test accuracy

Superclass Accuracy
Overall: 43.83 %
Seen: 61.11 %
Unseen: 0.00 %

Subclass Accuracy
Overall: 2.03 %
Seen: 9.56 %
Unseen: 0.00 %
'''

'\nThis simple baseline scores the following test accuracy\n\nSuperclass Accuracy\nOverall: 43.83 %\nSeen: 61.11 %\nUnseen: 0.00 %\n\nSubclass Accuracy\nOverall: 2.03 %\nSeen: 9.56 %\nUnseen: 0.00 %\n'

In [9]:
# Read the CSV file into a DataFrame
file_path = 'example_test_predictions.csv'  # Replace with the actual file path
df = pd.read_csv(file_path)

# Display the original DataFrame
print("Original DataFrame:")
print(df)

# Select two columns
selected_columns = df[['image', 'subclass_index']]

# Rename one of the columns
selected_columns = selected_columns.rename(columns={'image': 'ID'})
selected_columns = selected_columns.rename(columns={'subclass_index': 'Target'})

# Save the modified DataFrame to a new CSV file
output_file_path = 'sub_test.csv'  # Replace with the desired output file path
selected_columns.to_csv(output_file_path, index=False)

# Select two columns
selected_column = df[['image', 'superclass_index']]

# Rename one of the columns
selected_column = selected_column.rename(columns={'image': 'ID'})
selected_column = selected_column.rename(columns={'superclass_index': 'Target'})
print(selected_columns)

# Save the modified DataFrame to a new CSV file
output_file_path = 'super_test.csv'  # Replace with the desired output file path
selected_column.to_csv(output_file_path, index=False)

Original DataFrame:
           image  superclass_index  subclass_index
0          0.jpg                 1               2
1          1.jpg                 0              60
2          2.jpg                 2              34
3          3.jpg                 0              75
4          4.jpg                 1              21
...          ...               ...             ...
12372  12372.jpg                 0               6
12373  12373.jpg                 2              15
12374  12374.jpg                 2               3
12375  12375.jpg                 0              86
12376  12376.jpg                 1              87

[12377 rows x 3 columns]
              ID  Target
0          0.jpg       2
1          1.jpg      60
2          2.jpg      34
3          3.jpg      75
4          4.jpg      21
...          ...     ...
12372  12372.jpg       6
12373  12373.jpg      15
12374  12374.jpg       3
12375  12375.jpg      86
12376  12376.jpg      87

[12377 rows x 2 columns]
