In [2]:
import os
import random
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision

from torch.utils.data import Dataset, DataLoader, BatchSampler, random_split
from torchvision import transforms
from PIL import Image

# from google.colab import drive


In [3]:
# Create Dataset class for multilabel classification
class MultiClassImageDataset(Dataset):
    def __init__(self, ann_df, super_map_df, sub_map_df, img_dir, transform=None):
        self.ann_df = ann_df
        self.super_map_df = super_map_df
        self.sub_map_df = sub_map_df
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.ann_df)

    def __getitem__(self, idx):
        img_name = self.ann_df['image'][idx]
        img_path = os.path.join(self.img_dir, img_name)
        image = Image.open(img_path).convert('RGB')

        super_idx = self.ann_df['superclass_index'][idx]
        super_label = self.super_map_df['class'][super_idx]

        sub_idx = self.ann_df['subclass_index'][idx]
        sub_label = self.sub_map_df['class'][sub_idx]

        if self.transform:
            image = self.transform(image)

        return image, super_idx, super_label, sub_idx, sub_label

# class MultiClassImageTestDataset(Dataset):
#     def __init__(self, super_map_df, sub_map_df, img_dir, transform=None):
#         self.super_map_df = super_map_df
#         self.sub_map_df = sub_map_df
#         self.img_dir = img_dir
#         self.transform = transform

#     def __len__(self): # Count files in img_dir
#         return len([fname for fname in os.listdir(self.img_dir)])

#     def __getitem__(self, idx):
#         img_name = str(idx) + '.jpg'
#         img_path = os.path.join(self.img_dir, img_name)
#         image = Image.open(img_path).convert('RGB')


#         super_idx = self.ann_df['superclass_index'][idx]
#         super_label = self.super_map_df['class'][super_idx]

#         sub_idx = self.ann_df['subclass_index'][idx]
#         sub_label = self.sub_map_df['class'][sub_idx]

#         if self.transform:
#             image = self.transform(image)

#         return image, super_idx, super_label, sub_idx, sub_label, img_name

class MultiClassImageTestDataset(Dataset):
    def __init__(self, super_map_df, sub_map_df, img_dir, transform=None):
        self.super_map_df = super_map_df
        self.sub_map_df = sub_map_df
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self): # Count files in img_dir
        return len([fname for fname in os.listdir(self.img_dir)])

    def __getitem__(self, idx):
        img_name = str(idx) + '.jpg'
        img_path = os.path.join(self.img_dir, img_name)
        image = Image.open(img_path).convert('RGB')

        if self.transform:
            image = self.transform(image)

        return image, img_name

In [4]:
# # load the data from google drive and load into dataframes

# drive.mount('/content/drive')

file_path = 'Released_Data/'
# sub_class = 'subclass_mapping.csv'
# super_class = 'superclass_mapping.csv'
# train_data = 'train_data.csv'

# # Now you can read or manipulate the file
# df_sub = pd.read_csv(file_path+'subclass_mapping.csv')
# df_subclass = pd.read_csv(file_path+sub_class)
# df_superclass = pd.read_csv(file_path+super_class)
# df_train = pd.read_csv(file_path+train_data)

# # Example: Print the content
# df_subclass.head()

train_ann_df = pd.read_csv(file_path + 'train_data.csv')
super_map_df = pd.read_csv(file_path + 'superclass_mapping.csv')
sub_map_df = pd.read_csv(file_path + 'subclass_mapping.csv')

train_img_dir = file_path + 'train_shuffle'
test_img_dir = file_path + 'test_shuffle'

# count_subclass_87 = len(train_ann_df[train_ann_df['subclass_index'] == 87]) by default, there are 0 novel classes in training, so we should create our own

# all classes with subclass 45, 57, and 29, 78 -> should get subclass label of 87 (novel)

# target_classes = [45, 57, 29, 78]

# Update the subclass_index to 87 for the specified classes
# train_ann_df.loc[train_ann_df['subclass_index'].isin(target_classes), 'subclass_index'] = 87


# image_preprocessing = transforms.Compose([
#     transforms.ToTensor(),
#     transforms.Normalize(mean=(0), std=(1)),
# ])


In [5]:
# perform data manipulation: rotation, swapping, etc so that we can increase the size of the data set

# Suggestion:
# You should aim to train a generalizable model with all the techniques we have discussed
# so far (e.g., data augmentation, weight decay, etc.) Other tricks may include potentially
# building your local validation set for testing the model's generalization ability.

# Data augmentation and normalization
from torchvision import transforms

# # Training data transformations
# transform_train = transforms.Compose([
#     transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),  # Random crop with scaling
#     transforms.RandomHorizontalFlip(),
#     transforms.ToTensor(),
#     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
# ])

# # Testing data transformations
# transform_test = transforms.Compose([
#     transforms.Resize(224),  # Resize to 224x224
#     transforms.CenterCrop(224),  # Center crop to maintain the aspect ratio
#     transforms.ToTensor(),
#     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
# ])


# Training data transformations
transform_train = transforms.Compose([
    transforms.RandomResizedCrop(64),  # Random crop with scaling
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Testing data transformations
transform_test = transforms.Compose([
    transforms.Resize(64),  # Resize to 224x224
    transforms.CenterCrop(64),  # Center crop to maintain the aspect ratio
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])


# Apply data augmentation and normalization to datasets
train_dataset = MultiClassImageDataset(train_ann_df, super_map_df, sub_map_df, train_img_dir, transform=transform_train)
test_dataset = MultiClassImageTestDataset(super_map_df, sub_map_df, test_img_dir, transform=transform_test)


In [14]:
# account for unlabeled data ()

# https://cdn-uploads.piazza.com/paste/jcwopj0bkHVK/4e633d4b629ae6f0c6630bba588e0d783805da7fd03580939c93a350d54adde5/NNDL_Multi-label_Classification_Competition.pdf

In [6]:
# train_ann_df = pd.read_csv('train_data.csv')
# super_map_df = pd.read_csv('superclass_mapping.csv')
# sub_map_df = pd.read_csv('subclass_mapping.csv')

# train_img_dir = 'train_shuffle'
# test_img_dir = 'test_shuffle'

# image_preprocessing = transforms.Compose([
#     transforms.ToTensor(),
#     transforms.Normalize(mean=(0), std=(1)),
# ])

# Create train and val split
# train_dataset = MultiClassImageDataset(train_ann_df, super_map_df, sub_map_df, train_img_dir, transform=image_preprocessing)
train_dataset, val_dataset = random_split(train_dataset, [0.9, 0.1])

# Create test dataset
# test_dataset = MultiClassImageTestDataset(super_map_df, sub_map_df, test_img_dir, transform=image_preprocessing)

# Create dataloaders
batch_size = 64
train_loader = DataLoader(train_dataset,
                          batch_size=batch_size,
                          shuffle=True)

val_loader = DataLoader(val_dataset,
                        batch_size=batch_size,
                        shuffle=True)

test_loader = DataLoader(test_dataset,
                         batch_size=1,
                         shuffle=False)

In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# Residual Block
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(ResidualBlock, self).__init__()

        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)

        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)

        self.downsample = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.downsample = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        out += self.downsample(identity)
        out = self.relu(out)

        return out

class ResNet(nn.Module):
    def __init__(self):
        super(ResNet, self).__init__()

        self.block1 = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            ResidualBlock(32, 32),
            ResidualBlock(32, 32),
            nn.MaxPool2d(2, 2)
        )

        self.block2 = nn.Sequential(
            ResidualBlock(32, 64, stride=2),
            ResidualBlock(64, 64),
            ResidualBlock(64, 64),
            nn.MaxPool2d(2, 2)
        )

        self.block3 = nn.Sequential(
            ResidualBlock(64, 128, stride=2),
            ResidualBlock(128, 128),
            ResidualBlock(128, 128),
            nn.MaxPool2d(2, 2)
        )

        # Dynamically calculate the input size for the first fully connected layer
        dummy_input = torch.randn(1, 3, 64, 64)
        dummy_output = self._forward_conv(dummy_input)
        self.fc1_input_size = dummy_output.view(dummy_output.size(0), -1).size(1)

        self.fc1 = nn.Linear(self.fc1_input_size, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3a = nn.Linear(128, 4)
        self.fc3b = nn.Linear(128, 88)

    def _forward_conv(self, x):
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        return x

    def forward(self, x):
        x = self._forward_conv(x)
        x = torch.flatten(x, 1)

        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        super_out = self.fc3a(x)
        sub_out = self.fc3b(x)

        return super_out, sub_out


# Instantiate the model
resnet_model = ResNet()


In [23]:
# Simple CNN
class CNN(nn.Module):
    def __init__(self):
        super().__init__()

        self.block1 = nn.Sequential(
                        nn.Conv2d(3, 32, 3, padding='same'),
                        nn.ReLU(),
                        nn.BatchNorm2d(32),
                        nn.Conv2d(32, 32, 3, padding='same'),
                        nn.ReLU(),
                        nn.BatchNorm2d(32),
                        nn.Conv2d(32, 32, 3, padding='same'),
                        nn.ReLU(),
                        nn.BatchNorm2d(32),
                        nn.MaxPool2d(2, 2)
                      )

        self.block2 = nn.Sequential(
                        nn.Conv2d(32, 64, 3, padding='same'),
                        nn.ReLU(),
                        nn.BatchNorm2d(64),
                        nn.Conv2d(64, 64, 3, padding='same'),
                        nn.ReLU(),
                        nn.BatchNorm2d(64),
                        nn.Conv2d(64, 64, 3, padding='same'),
                        nn.ReLU(),
                        nn.BatchNorm2d(64),
                        nn.MaxPool2d(2, 2)
                      )

        self.block3 = nn.Sequential(
                        nn.Conv2d(64, 128, 3, padding='same'),
                        nn.ReLU(),
                        nn.BatchNorm2d(128),
                        nn.Conv2d(128, 128, 3, padding='same'),
                        nn.ReLU(),
                        nn.BatchNorm2d(128),
                        nn.Conv2d(128, 128, 3, padding='same'),
                        nn.ReLU(),
                        nn.BatchNorm2d(128),
                        nn.MaxPool2d(2, 2)
                      )

        self.fc1 = nn.Linear(4*4*128, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3a = nn.Linear(128, 4)
        self.fc3b = nn.Linear(128, 88)

    def forward(self, x):
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = torch.flatten(x, 1) # flatten all dimensions except batch

        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        super_out = self.fc3a(x)
        sub_out = self.fc3b(x)
        return super_out, sub_out

class Trainer():
    def __init__(self, model, criterion, optimizer, train_loader, val_loader, test_loader=None, device='cpu'):
        self.model = model
        self.criterion = criterion
        self.optimizer = optimizer
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.test_loader = test_loader
        self.device = device

    def train_epoch(self):
        running_loss = 0.0
        for i, data in enumerate(self.train_loader):
            # print('1')
            inputs, super_labels, sub_labels = data[0].to(self.device), data[1].to(self.device), data[3].to(self.device)
            # print('2')
            self.optimizer.zero_grad()
            # print('3')
            super_outputs, sub_outputs = self.model(inputs)
            # print('4')
            # prob dist = [0.2, 0.05, ...]

            # threasholding, 
            loss = self.criterion(super_outputs, super_labels) + self.criterion(sub_outputs, sub_labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        print(f'Training loss: {running_loss/i:.3f}')

    def validate_epoch(self):
        super_correct = 0
        sub_correct = 0
        total = 0
        running_loss = 0.0
        with torch.no_grad():
            for i, data in enumerate(self.val_loader):
                inputs, super_labels, sub_labels = data[0].to(self.device), data[1].to(self.device), data[3].to(self.device)

                super_outputs, sub_outputs = self.model(inputs)
                loss = self.criterion(super_outputs, super_labels) + self.criterion(sub_outputs, sub_labels)
                _, super_predicted = torch.max(super_outputs.data, 1)
                _, sub_predicted = torch.max(sub_outputs.data, 1)

                total += super_labels.size(0)
                super_correct += (super_predicted == super_labels).sum().item()
                sub_correct += (sub_predicted == sub_labels).sum().item()
                running_loss += loss.item()

        print(f'Validation loss: {running_loss/i:.3f}')
        print(f'Validation superclass acc: {100 * super_correct / total:.2f} %')
        print(f'Validation subclass acc: {100 * sub_correct / total:.2f} %')

    def test(self, save_to_csv=False, return_predictions=False):
        if not self.test_loader:
            raise NotImplementedError('test_loader not specified')

        # Evaluate on test set, in this simple demo no special care is taken for novel/unseen classes
        test_predictions = {'image': [], 'superclass_index': [], 'subclass_index': []}
        with torch.no_grad():
            for i, data in enumerate(self.test_loader):
                inputs, img_name = data[0].to(device), data[1]

                super_outputs, sub_outputs = self.model(inputs)
                _, super_predicted = torch.max(super_outputs.data, 1)
                _, sub_predicted = torch.max(sub_outputs.data, 1)

                test_predictions['image'].append(img_name[0])
                test_predictions['superclass_index'].append(super_predicted.item())
                test_predictions['subclass_index'].append(sub_predicted.item())

        test_predictions = pd.DataFrame(data=test_predictions)

        if save_to_csv:
            test_predictions.to_csv('example_test_predictions.csv', index=False)

        if return_predictions:
            return test_predictions

In [9]:
# Init model and trainer
device = 'cpu'
model = ResNet().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
# trainer = Trainer(model, criterion, optimizer, train_loader, val_loader, test_loader)

new_trainer = Trainer(model, criterion, optimizer, train_loader, val_loader, test_loader)

In [15]:
# Training loop
for epoch in range(20):
    print(f'Epoch {epoch+1}')
    # torch.cuda.empty_cache()
    trainer.train_epoch()
    trainer.validate_epoch()
    print('')

print('Finished Training')

Epoch 1
Training loss: 2.175
Validation loss: 2.641
Validation superclass acc: 89.56 %
Validation subclass acc: 44.30 %

Epoch 2
Training loss: 2.160
Validation loss: 2.817
Validation superclass acc: 85.13 %
Validation subclass acc: 40.98 %

Epoch 3
Training loss: 2.061
Validation loss: 2.652
Validation superclass acc: 87.18 %
Validation subclass acc: 45.57 %

Epoch 4
Training loss: 2.083
Validation loss: 2.743
Validation superclass acc: 88.13 %
Validation subclass acc: 44.94 %

Epoch 5
Training loss: 2.064
Validation loss: 2.646
Validation superclass acc: 86.55 %
Validation subclass acc: 43.99 %

Epoch 6
Training loss: 2.028
Validation loss: 2.625
Validation superclass acc: 89.40 %
Validation subclass acc: 45.57 %

Epoch 7
Training loss: 1.975
Validation loss: 2.566
Validation superclass acc: 89.08 %
Validation subclass acc: 46.20 %

Epoch 8
Training loss: 1.940
Validation loss: 2.456
Validation superclass acc: 90.35 %
Validation subclass acc: 47.31 %

Epoch 9
Training loss: 1.847
Val

In [24]:
stored_trainer = trainer

In [22]:
trainer.test(save_to_csv=False, return_predictions=True)

'''
This simple baseline scores the following test accuracy

Superclass Accuracy
Overall: 43.83 % # 30%
Seen: 61.11 %
Unseen: 0.00 %

Subclass Accuracy
Overall: 2.03 % # above 7 target
Seen: 9.56 %
Unseen: 0.00 %
'''

test result
data [tensor([[[[ 1.5639,  1.3584,  0.9303,  ..., -0.3027,  0.9132,  1.5125],
          [ 1.5639,  1.3584,  0.9303,  ..., -0.3027,  0.9132,  1.5125],
          [ 1.5468,  1.3413,  0.9132,  ..., -0.2856,  0.9303,  1.5297],
          ...,
          [ 0.3481,  0.4508,  0.6221,  ..., -0.8849, -0.7822, -0.7479],
          [ 0.5364,  0.6049,  0.7762,  ..., -0.7650, -0.6623, -0.6281],
          [ 0.6221,  0.6906,  0.8447,  ..., -0.6965, -0.6109, -0.5767]],

         [[ 1.7283,  1.5007,  1.0630,  ..., -0.1800,  1.0630,  1.6933],
          [ 1.7283,  1.5007,  1.0455,  ..., -0.1800,  1.0630,  1.6933],
          [ 1.7108,  1.4832,  1.0280,  ..., -0.1625,  1.0805,  1.7108],
          ...,
          [ 0.4153,  0.5203,  0.6954,  ..., -1.0203, -0.9153, -0.8803],
          [ 0.6078,  0.6779,  0.8529,  ..., -0.8978, -0.7927, -0.7577],
          [ 0.6954,  0.7654,  0.9230,  ..., -0.8277, -0.7402, -0.7052]],

         [[ 1.9777,  1.7511,  1.2980,  ..., -0.0267,  1.2108,  1.8208],
          [ 

IndexError: list index out of range