In [1]:

# citation: https://medium.com/mlearning-ai/implementation-of-googlenet-on-keras-d9873aeed83c
# citation: https://drive.google.com/drive/folders/1idfa8y7esf7usGo7SSxsH4iKBECEPFNr?usp=share_link 


In [2]:
import os
import random
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision.models import googlenet

import torchvision

from torch.utils.data import Dataset, DataLoader, BatchSampler, random_split
from torchvision import transforms
from PIL import Image
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import seaborn as sns
from scipy.special import k1
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from tensorflow.keras.optimizers import Adam

from keras.models import Model
from keras.layers import Input, Conv2D, MaxPooling2D, AveragePooling2D, Flatten, GlobalAveragePooling2D, Dense, Dropout, concatenate

2023-12-18 15:23:41.123381: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2023-12-18 15:23:41.172496: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-18 15:23:41.172546: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-18 15:23:41.174126: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-12-18 15:23:41.182244: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2023-12-18 15:23:41.183390: I tensorflow/core/platform/cpu_feature_guard.cc:1

In [3]:
# Mount Google Drive
# drive.mount('/content/drive')

In [4]:
# Create Dataset class for multilabel classification
class MultiClassImageDataset(Dataset):
    def __init__(self, ann_df, super_map_df, sub_map_df, img_dir, transform=None):
        self.ann_df = ann_df
        self.super_map_df = super_map_df
        self.sub_map_df = sub_map_df
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.ann_df)

    def __getitem__(self, idx):
        img_name = self.ann_df['image'][idx]
        img_path = os.path.join(self.img_dir, img_name)
        image = Image.open(img_path).convert('RGB')

        super_idx = self.ann_df['superclass_index'][idx]
        super_label = self.super_map_df['class'][super_idx]

        sub_idx = self.ann_df['subclass_index'][idx]
        sub_label = self.sub_map_df['class'][sub_idx]

        if self.transform:
            image = self.transform(image)

        return image, super_idx, super_label, sub_idx, sub_label

class MultiClassImageTestDataset(Dataset):
    def __init__(self, super_map_df, sub_map_df, img_dir, transform=None):
        self.super_map_df = super_map_df
        self.sub_map_df = sub_map_df
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self): # Count files in img_dir
        return len([fname for fname in os.listdir(self.img_dir)])

    def __getitem__(self, idx):
        img_name = str(idx) + '.jpg'
        img_path = os.path.join(self.img_dir, img_name)
        image = Image.open(img_path).convert('RGB')

        if self.transform:
            image = self.transform(image)

        return image, img_name

In [5]:
train_ann_df = pd.read_csv('train_data.csv')
super_map_df = pd.read_csv('superclass_mapping.csv')
sub_map_df = pd.read_csv('subclass_mapping.csv')

train_img_dir = 'train_shuffle'
test_img_dir = 'test_shuffle'

image_preprocessing = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=(0), std=(1)),
])

# Create train and val split
train_dataset = MultiClassImageDataset(train_ann_df, super_map_df, sub_map_df, train_img_dir, transform=image_preprocessing)
train_dataset, val_dataset = random_split(train_dataset, [0.9, 0.1])

# Create test dataset
test_dataset = MultiClassImageTestDataset(super_map_df, sub_map_df, test_img_dir, transform=image_preprocessing)

# Create dataloaders
batch_size = 16
train_loader = DataLoader(train_dataset,
                          batch_size=batch_size,
                          shuffle=True)

val_loader = DataLoader(val_dataset,
                        batch_size=batch_size,
                        shuffle=True)

test_loader = DataLoader(test_dataset,
                         batch_size=1,
                         shuffle=False)

In [6]:
class GoogLeNet(nn.Module):
    def __init__(self):
        super().__init__()

        # Initial Layers
        self.initial_layers = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding='same'),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=1),
            nn.Conv2d(64, 64, kernel_size=1, stride=1, padding='same'),
            nn.ReLU(),
            nn.Conv2d(64, 192, kernel_size=3, padding='same'),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=1)
        )

        self.avgpool = nn.AdaptiveAvgPool2d((1,1))
        self.dropout = nn.Dropout(0.5)

        self.fc = nn.Linear(1024, 128)

        self.fc3a = nn.Linear(128, 4)
        self.fc3b = nn.Linear(128, 88)

    def inception_block(self, input_layer, f1, f2_conv1, f2_conv3, f3_conv1, f3_conv5, f4, x):
        # Inception blocks a
        self.inception_a = nn.Sequential(
            nn.Conv2d(input_layer, f1, kernel_size=1, padding='same').to(x.device),
            nn.ReLU()
        )

        # Inception blocks b
        self.inception_b = nn.Sequential(
            nn.Conv2d(input_layer, f2_conv1, kernel_size=1, padding='same').to(x.device),
            nn.ReLU(),
            nn.Conv2d(f2_conv1, f2_conv3, kernel_size=3, padding='same').to(x.device),
            nn.ReLU()
        )

        # Inception blocks c
        self.inception_c = nn.Sequential(
            nn.Conv2d(input_layer, f3_conv1, kernel_size=1, padding='same').to(x.device),
            nn.ReLU(),
            nn.Conv2d(f3_conv1, f3_conv5, kernel_size=5, padding='same').to(x.device),
            nn.ReLU()
        )

        # Inception blocks d
        self.inception_d = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
            nn.Conv2d(input_layer, f4, kernel_size=1, padding='same').to(x.device),
            nn.ReLU()
        )

        x_a = self.inception_a(x)
        x_b = self.inception_b(x)
        x_c = self.inception_c(x)
        x_d = self.inception_d(x)
        # print(x_a.shape, x_b.shape, x_c.shape, x_d.shape)

        inception_cat = torch.cat((x_a, x_b, x_c, x_d), dim=1) # change to -1?
        return inception_cat



    def forward(self, x):
        x = self.initial_layers(x)
        x = self.inception_block(192, 64, 96, 128, 16, 32, 32, x)
        x = self.inception_block(256, 128, 128, 192, 32, 96, 64, x)

        x = nn.MaxPool2d(kernel_size=2, stride=2)(x)

        x = self.inception_block(480, 192, 96, 208, 16, 48, 64, x)
        x = self.inception_block(512, 160, 112, 224, 24, 64, 64, x)
        x = self.inception_block(512, 128, 128, 256, 24, 64, 64, x)
        x = self.inception_block(512, 112, 144, 288, 32, 64, 64, x)
        x = self.inception_block(528, 256, 160, 320, 32, 128, 128, x)
        x = nn.MaxPool2d(kernel_size=3, stride=2)(x)
        x = self.inception_block(832, 256, 160, 320, 32, 128, 128, x)
        x = self.inception_block(832, 384, 192, 384, 48, 128, 128, x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.dropout(x)
        x = self.fc(x)

        super_out = self.fc3a(x)
        sub_out = self.fc3b(x)
        return super_out, sub_out

class Trainer():
    def __init__(self, model, criterion, optimizer, train_loader, val_loader, test_loader=None, device='cpu'):
        self.model = model
        self.criterion = criterion
        self.optimizer = optimizer
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.test_loader = test_loader
        self.device = device
        self.max_super_prob_all = []
        self.max_sub_prob_all = []

    def train_epoch(self):
        running_loss = 0.0
        for i, data in enumerate(self.train_loader):
            inputs, super_labels, sub_labels = data[0].to(device), data[1].to(device), data[3].to(device)

            self.optimizer.zero_grad()
            super_outputs, sub_outputs = self.model(inputs)
            loss = self.criterion(super_outputs, super_labels) + self.criterion(sub_outputs, sub_labels)
            loss.backward()
            self.optimizer.step()

            running_loss += loss.item()

        print(f'Training loss: {running_loss/i:.3f}')

    def validate_epoch(self):
        super_correct = 0
        sub_correct = 0
        total = 0
        running_loss = 0.0
        with torch.no_grad():
            for i, data in enumerate(self.val_loader):
                inputs, super_labels, sub_labels = data[0].to(device), data[1].to(device), data[3].to(device)

                super_outputs, sub_outputs = self.model(inputs)
                loss = self.criterion(super_outputs, super_labels) + self.criterion(sub_outputs, sub_labels)

                # Apply softmax to get probabilities
                super_probs = F.softmax(super_outputs, dim=1)
                sub_probs = F.softmax(sub_outputs, dim=1)

                # Get maximum probability values and corresponding predicted classes
                max_super_prob, super_predicted = torch.max(super_probs, 1)
                max_sub_prob, sub_predicted = torch.max(sub_probs, 1)

                # max_super_prob, super_predicted = torch.max(super_outputs.data, 1)
                # max_sub_prob, sub_predicted = torch.max(sub_outputs.data, 1)

                print('max_super_prob:', max_super_prob)
                print('max_sub_prob:', max_sub_prob)

                self.max_super_prob_all.append(max_super_prob)
                self.max_sub_prob_all.append(max_sub_prob)

                total += super_labels.size(0)
                super_correct += (super_predicted == super_labels).sum().item()
                sub_correct += (sub_predicted == sub_labels).sum().item()
                running_loss += loss.item()

        print(f'Validation loss: {running_loss/i:.3f}')
        print(f'Validation superclass acc: {100 * super_correct / total:.2f} %')
        print(f'Validation subclass acccc: {100 * sub_correct / total:.2f} %')

    def test(self, save_to_csv=False, return_predictions=False):
        if not self.test_loader:
            raise NotImplementedError('test_loader not specified')

        # Evaluate on test set, in this simple demo no special care is taken for novel/unseen classes
        test_predictions = {'image': [], 'superclass_index': [], 'subclass_index': []}
        with torch.no_grad():
            for i, data in enumerate(self.test_loader):
                inputs, img_name = data[0].to(device), data[1]

                super_outputs, sub_outputs = self.model(inputs)
                # Commented is the old method
                # _, super_predicted = torch.max(super_outputs.data, 1)
                # _, sub_predicted = torch.max(sub_outputs.data, 1)

                 # Apply softmax to get probabilities
                super_probs = F.softmax(super_outputs, dim=1)
                sub_probs = F.softmax(sub_outputs, dim=1)

                # is_novel_super = max_prob < threshold

                # Get maximum probability values and corresponding predicted classes
                max_super_prob, super_predicted = torch.max(super_probs, 1)
                max_sub_prob, sub_predicted = torch.max(sub_probs, 1)

                super_threshold = 0.3 # 0.4 gave 0.41053 --> need to drop this to 0.05 or 0.1 and retest
                sub_threshold = 0.4 # because the mean is around 0.6 and the mean is much smaller

                sub_predicted[max_sub_prob < sub_threshold] = 87
                super_predicted[max_super_prob < super_threshold] = 3

                test_predictions['image'].append(img_name[0])
                test_predictions['superclass_index'].append(super_predicted.item())
                test_predictions['subclass_index'].append(sub_predicted.item())

        test_predictions = pd.DataFrame(data=test_predictions)

        if save_to_csv:
            test_predictions.to_csv('example_test_predictions.csv', index=False)

        if return_predictions:
            return test_predictions

    def max_probs_all(self):
        return self.max_super_prob_all, self.max_sub_prob_all

In [7]:
google_net = torchvision.models.inception_v3(pretrained=True)



In [8]:
# Init model and trainer
device = 'cuda'
model = GoogLeNet().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)
trainer = Trainer(model, criterion, optimizer, train_loader, val_loader, test_loader)

In [9]:
# Training loop
for epoch in range(15):
    print(f'Epoch {epoch+1}')
    trainer.train_epoch()
    trainer.validate_epoch()
    print('')

print('Finished Training')

Epoch 1
Training loss: 5.743
max_super_prob: tensor([0.3553, 0.3611, 0.3586, 0.3600, 0.3561, 0.3586, 0.3551, 0.3607, 0.3615,
        0.3558, 0.3668, 0.3581, 0.3566, 0.3586, 0.3566, 0.3555],
       device='cuda:0')
max_sub_prob: tensor([0.0139, 0.0141, 0.0139, 0.0141, 0.0139, 0.0141, 0.0140, 0.0140, 0.0142,
        0.0139, 0.0143, 0.0140, 0.0140, 0.0141, 0.0140, 0.0139],
       device='cuda:0')
max_super_prob: tensor([0.3462, 0.3516, 0.3492, 0.3537, 0.3526, 0.3462, 0.3505, 0.3516, 0.3543,
        0.3615, 0.3478, 0.3499, 0.3526, 0.3548, 0.3529, 0.3490],
       device='cuda:0')
max_sub_prob: tensor([0.0139, 0.0140, 0.0139, 0.0140, 0.0138, 0.0138, 0.0138, 0.0140, 0.0140,
        0.0141, 0.0139, 0.0139, 0.0139, 0.0141, 0.0140, 0.0138],
       device='cuda:0')
max_super_prob: tensor([0.3534, 0.3628, 0.3629, 0.3613, 0.3590, 0.3638, 0.3533, 0.3510, 0.3576,
        0.3554, 0.3565, 0.3600, 0.3600, 0.3516, 0.3547, 0.3496],
       device='cuda:0')
max_sub_prob: tensor([0.0139, 0.0142, 0.0141, 0.01

In [10]:
trainer.test(save_to_csv=True, return_predictions=True)

'''
This simple baseline scores the following test accuracy

Superclass Accuracy
Overall: 43.83 %
Seen: 61.11 %
Unseen: 0.00 %

Subclass Accuracy
Overall: 2.03 %
Seen: 9.56 %
Unseen: 0.00 %
'''

'\nThis simple baseline scores the following test accuracy\n\nSuperclass Accuracy\nOverall: 43.83 %\nSeen: 61.11 %\nUnseen: 0.00 %\n\nSubclass Accuracy\nOverall: 2.03 %\nSeen: 9.56 %\nUnseen: 0.00 %\n'

In [11]:
# Read the CSV file into a DataFrame
file_path = 'example_test_predictions.csv'  # Replace with the actual file path
df = pd.read_csv(file_path)

# Display the original DataFrame
print("Original DataFrame:")
print(df)

# Select two columns
selected_columns = df[['image', 'subclass_index']]

# Rename one of the columns
selected_columns = selected_columns.rename(columns={'image': 'ID'})
selected_columns = selected_columns.rename(columns={'subclass_index': 'Target'})

# Save the modified DataFrame to a new CSV file
output_file_path = 'sub_test.csv'  # Replace with the desired output file path
selected_columns.to_csv(output_file_path, index=False)

# Select two columns
selected_column = df[['image', 'superclass_index']]

# Rename one of the columns
selected_column = selected_column.rename(columns={'image': 'ID'})
selected_column = selected_column.rename(columns={'superclass_index': 'Target'})
print(selected_columns)

# Save the modified DataFrame to a new CSV file
output_file_path = 'super_test.csv'  # Replace with the desired output file path
selected_column.to_csv(output_file_path, index=False)

Original DataFrame:
           image  superclass_index  subclass_index
0          0.jpg                 2              87
1          1.jpg                 2              87
2          2.jpg                 2              87
3          3.jpg                 2              87
4          4.jpg                 2              87
...          ...               ...             ...
12372  12372.jpg                 2              87
12373  12373.jpg                 2              87
12374  12374.jpg                 2              87
12375  12375.jpg                 2              87
12376  12376.jpg                 2              87

[12377 rows x 3 columns]
              ID  Target
0          0.jpg      87
1          1.jpg      87
2          2.jpg      87
3          3.jpg      87
4          4.jpg      87
...          ...     ...
12372  12372.jpg      87
12373  12373.jpg      87
12374  12374.jpg      87
12375  12375.jpg      87
12376  12376.jpg      87

[12377 rows x 2 columns]
