In [5]:
import os
import random
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision.models import googlenet

import torchvision

from torch.utils.data import Dataset, DataLoader, BatchSampler, random_split
from torchvision import transforms
from PIL import Image
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import seaborn as sns
from scipy.special import k1
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from tensorflow.keras.optimizers import Adam

from keras.models import Model
from keras.layers import Input, Conv2D, MaxPooling2D, AveragePooling2D, Flatten, GlobalAveragePooling2D, Dense, Dropout, concatenate

# from google.colab import drive

2023-12-11 22:16:45.973532: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2023-12-11 22:16:47.995131: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-11 22:16:47.995247: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-11 22:16:48.274166: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-12-11 22:16:48.970854: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2023-12-11 22:16:48.986045: I tensorflow/core/platform/cpu_feature_guard.cc:1

In [6]:
# Mount Google Drive
# drive.mount('/content/drive')

# file_path = '/content/drive/MyDrive/Second Year [2023]/Fall Semester/NNDL/Project/data/Released_Data/'

# contents = os.listdir(file_path)

# # Print the contents
# for item in contents:
#     print(item)

In [7]:
# Create Dataset class for multilabel classification
class MultiClassImageDataset(Dataset):
    def __init__(self, ann_df, super_map_df, sub_map_df, img_dir, transform=None):
        self.ann_df = ann_df
        self.super_map_df = super_map_df
        self.sub_map_df = sub_map_df
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.ann_df)

    def __getitem__(self, idx):
        img_name = self.ann_df['image'][idx]
        img_path = os.path.join(self.img_dir, img_name)
        image = Image.open(img_path).convert('RGB')

        super_idx = self.ann_df['superclass_index'][idx]
        super_label = self.super_map_df['class'][super_idx]

        sub_idx = self.ann_df['subclass_index'][idx]
        sub_label = self.sub_map_df['class'][sub_idx]

        if self.transform:
            image = self.transform(image)

        return image, super_idx, super_label, sub_idx, sub_label

class MultiClassImageTestDataset(Dataset):
    def __init__(self, super_map_df, sub_map_df, img_dir, transform=None):
        self.super_map_df = super_map_df
        self.sub_map_df = sub_map_df
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self): # Count files in img_dir
        return len([fname for fname in os.listdir(self.img_dir)])

    def __getitem__(self, idx):
        img_name = str(idx) + '.jpg'
        img_path = os.path.join(self.img_dir, img_name)
        image = Image.open(img_path).convert('RGB')

        if self.transform:
            image = self.transform(image)

        return image, img_name

In [8]:
train_ann_df = pd.read_csv('train_data.csv')
super_map_df = pd.read_csv('superclass_mapping.csv')
sub_map_df = pd.read_csv('subclass_mapping.csv')

train_img_dir = 'train_shuffle'
test_img_dir = 'test_shuffle'

In [9]:
count= len(train_ann_df) 
print(count)

6322


In [10]:
6322.jpg

SyntaxError: invalid imaginary literal (4028558996.py, line 1)

In [11]:
train_ann_df.tail()

Unnamed: 0,image,superclass_index,subclass_index
6317,6317.jpg,2,63
6318,6318.jpg,2,74
6319,6319.jpg,0,86
6320,6320.jpg,0,14
6321,6321.jpg,2,39


In [12]:
super_map_df.tail()

Unnamed: 0,index,class
0,0,bird
1,1,dog
2,2,reptile
3,3,novel


In [13]:
# train_ann_df = pd.read_csv('/content/drive/MyDrive/Released_Data/train_data.csv')
# super_map_df = pd.read_csv('/content/drive/MyDrive/Released_Data/superclass_mapping.csv')
# sub_map_df = pd.read_csv('/content/drive/MyDrive/Released_Data/subclass_mapping.csv')

# train_img_dir = '/content/drive/MyDrive/train_shuffle'
# test_img_dir = '/content/drive/MyDrive/test_shuffle'



image_preprocessing = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=(0), std=(1)),
])

# Create train and val split
train_dataset = MultiClassImageDataset(train_ann_df, super_map_df, sub_map_df, train_img_dir, transform=image_preprocessing)
train_dataset, val_dataset = random_split(train_dataset, [0.9, 0.1])

# Create test dataset
test_dataset = MultiClassImageTestDataset(super_map_df, sub_map_df, test_img_dir, transform=image_preprocessing)

# Create dataloaders
batch_size = 16
train_loader = DataLoader(train_dataset,
                          batch_size=batch_size,
                          shuffle=True)

val_loader = DataLoader(val_dataset,
                        batch_size=batch_size,
                        shuffle=True)

test_loader = DataLoader(test_dataset,
                         batch_size=1,
                         shuffle=False)

In [154]:
class GoogLeNet(nn.Module):
    def __init__(self):
        super().__init__()

        # Initial Layers
        self.initial_layers = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding='same'),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=1),
            nn.Conv2d(64, 64, kernel_size=1, stride=1, padding='same'),
            nn.ReLU(),
            nn.Conv2d(64, 192, kernel_size=3, padding='same'),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=1)
        )
        
        self.avgpool = nn.AdaptiveAvgPool2d((1,1))
        self.dropout = nn.Dropout(0.5)
        
        self.fc = nn.Linear(1024, 128)
        
        self.fc3a = nn.Linear(128, 4)
        self.fc3b = nn.Linear(128, 88)
        
    def inception_block(self, input_layer, f1, f2_conv1, f2_conv3, f3_conv1, f3_conv5, f4, x):
        # Inception blocks a
        self.inception_a = nn.Sequential(
            nn.Conv2d(input_layer, f1, kernel_size=1, padding='same').to(x.device),
            nn.ReLU()
        )
        
        # Inception blocks b
        self.inception_b = nn.Sequential(
            nn.Conv2d(input_layer, f2_conv1, kernel_size=1, padding='same').to(x.device),
            nn.ReLU(),
            nn.Conv2d(f2_conv1, f2_conv3, kernel_size=3, padding='same').to(x.device),
            nn.ReLU()
        )
        
        # Inception blocks c
        self.inception_c = nn.Sequential(
            nn.Conv2d(input_layer, f3_conv1, kernel_size=1, padding='same').to(x.device),
            nn.ReLU(),
            nn.Conv2d(f3_conv1, f3_conv5, kernel_size=5, padding='same').to(x.device),
            nn.ReLU()
        )
        
        # Inception blocks d
        self.inception_d = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
            nn.Conv2d(input_layer, f4, kernel_size=1, padding='same').to(x.device),
            nn.ReLU()
        )
        
        x_a = self.inception_a(x)
        x_b = self.inception_b(x)
        x_c = self.inception_c(x)
        x_d = self.inception_d(x)
        # print(x_a.shape, x_b.shape, x_c.shape, x_d.shape)
        
        inception_cat = torch.cat((x_a, x_b, x_c, x_d), dim=1) # change to -1?
        return inception_cat
    
    

    def forward(self, x):
        x = self.initial_layers(x)
        x = self.inception_block(192, 64, 96, 128, 16, 32, 32, x)
        x = self.inception_block(256, 128, 128, 192, 32, 96, 64, x)
        
        x = nn.MaxPool2d(kernel_size=2, stride=2)(x)
        
        x = self.inception_block(480, 192, 96, 208, 16, 48, 64, x)
        x = self.inception_block(512, 160, 112, 224, 24, 64, 64, x)
        x = self.inception_block(512, 128, 128, 256, 24, 64, 64, x)
        x = self.inception_block(512, 112, 144, 288, 32, 64, 64, x)
        x = self.inception_block(528, 256, 160, 320, 32, 128, 128, x)
        x = nn.MaxPool2d(kernel_size=3, stride=2)(x)
        x = self.inception_block(832, 256, 160, 320, 32, 128, 128, x)
        x = self.inception_block(832, 384, 192, 384, 48, 128, 128, x)
        
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.dropout(x)
        x = self.fc(x)
        
        super_out = self.fc3a(x)
        sub_out = self.fc3b(x)
        return super_out, sub_out

class Trainer():
    def __init__(self, model, criterion, optimizer, train_loader, val_loader, test_loader=None, device='cuda'):
        self.model = model
        self.criterion = criterion
        self.optimizer = optimizer
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.test_loader = test_loader
        self.device = device

    def train_epoch(self):
        running_loss = 0.0
        device = self.device
        for i, data in enumerate(self.train_loader):
            inputs, super_labels, sub_labels = data[0].to(device), data[1].to(device), data[3].to(device)

            self.optimizer.zero_grad()
            super_outputs, sub_outputs = self.model(inputs)
            loss = self.criterion(super_outputs, super_labels) + self.criterion(sub_outputs, sub_labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        print(f'Training loss: {running_loss/i:.3f}')

    def validate_epoch(self):
        super_correct = 0
        sub_correct = 0
        total = 0
        running_loss = 0.0
        device = self.device
        with torch.no_grad():
            for i, data in enumerate(self.val_loader):
                inputs, super_labels, sub_labels = data[0].to(device), data[1].to(device), data[3].to(device)

                super_outputs, sub_outputs = self.model(inputs)
                loss = self.criterion(super_outputs, super_labels) + self.criterion(sub_outputs, sub_labels)
                _, super_predicted = torch.max(super_outputs.data, 1)
                _, sub_predicted = torch.max(sub_outputs.data, 1)

                total += super_labels.size(0)
                super_correct += (super_predicted == super_labels).sum().item()
                sub_correct += (sub_predicted == sub_labels).sum().item()
                running_loss += loss.item()

        print(f'Validation loss: {running_loss/i:.3f}')
        print(f'Validation superclass acc: {100 * super_correct / total:.2f} %')
        print(f'Validation subclass acc: {100 * sub_correct / total:.2f} %')

    def test(self, save_to_csv=False, return_predictions=False):
        if not self.test_loader:
            raise NotImplementedError('test_loader not specified')

        # Evaluate on test set, in this simple demo no special care is taken for novel/unseen classes
        test_predictions = {'image': [], 'superclass_index': [], 'subclass_index': []}
        with torch.no_grad():
            for i, data in enumerate(self.test_loader):
                inputs, img_name = data[0].to(device), data[1]

                super_outputs, sub_outputs = self.model(inputs)
                _, super_predicted = torch.max(super_outputs.data, 1)
                _, sub_predicted = torch.max(sub_outputs.data, 1)

                test_predictions['image'].append(img_name[0])
                test_predictions['superclass_index'].append(super_predicted.item())
                test_predictions['subclass_index'].append(sub_predicted.item())

        test_predictions = pd.DataFrame(data=test_predictions)

        if save_to_csv:
            test_predictions.to_csv('example_test_predictions.csv', index=False)

        if return_predictions:
            return test_predictions

In [157]:
google_net = torchvision.models.inception_v3(pretrained=True)

Downloading: "https://download.pytorch.org/models/inception_v3_google-0cc3c7bd.pth" to /home/raagbhargava/.cache/torch/hub/checkpoints/inception_v3_google-0cc3c7bd.pth
100%|████████████████████████████████████████| 104M/104M [00:02<00:00, 42.7MB/s]


In [155]:
# Init model and trainer
device = 'cuda'
model = GoogLeNet().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-2)
trainer = Trainer(model, criterion, optimizer, train_loader, val_loader, test_loader)

In [156]:
# Training loop
for epoch in range(20):
    print(f'Epoch {epoch+1}')
    trainer.train_epoch()
    trainer.validate_epoch()
    print('')

print('Finished Training')

Epoch 1


RuntimeError: Given groups=1, weight of size [256, 528, 1, 1], expected input[16, 512, 15, 15] to have 528 channels, but got 512 channels instead

In [None]:
trainer.test(save_to_csv=True, return_predictions=True)

'''
This simple baseline scores the following test accuracy

Superclass Accuracy
Overall: 43.83 %
Seen: 61.11 %
Unseen: 0.00 %

Subclass Accuracy
Overall: 2.03 %
Seen: 9.56 %
Unseen: 0.00 %
'''

In [None]:
# Read the CSV file into a DataFrame
file_path = 'example_test_predictions.csv'  # Replace with the actual file path
df = pd.read_csv(file_path)

# Display the original DataFrame
print("Original DataFrame:")
print(df)

# Select two columns
selected_columns = df[['image', 'subclass_index']]

# Rename one of the columns
selected_columns = selected_columns.rename(columns={'image': 'ID'})
selected_columns = selected_columns.rename(columns={'subclass_index': 'Target'})

# Save the modified DataFrame to a new CSV file
output_file_path = 'sub_test.csv'  # Replace with the desired output file path
selected_columns.to_csv(output_file_path, index=False)

# Select two columns
selected_column = df[['image', 'superclass_index']]

# Rename one of the columns
selected_column = selected_column.rename(columns={'image': 'ID'})
selected_column = selected_column.rename(columns={'superclass_index': 'Target'})
print(selected_columns)

# Save the modified DataFrame to a new CSV file
output_file_path = 'super_test.csv'  # Replace with the desired output file path
selected_column.to_csv(output_file_path, index=False)

In [5]:
def Inception_block(input_layer, f1, f2_conv1, f2_conv3, f3_conv1, f3_conv5, f4):
    # Input:
    # - f1: number of filters of the 1x1 convolutional layer in the first path
    # - f2_conv1, f2_conv3 are number of filters corresponding to the 1x1 and 3x3 convolutional layers in the second path
    # - f3_conv1, f3_conv5 are the number of filters corresponding to the 1x1 and 5x5  convolutional layer in the third path
    # - f4: number of filters of the 1x1 convolutional layer in the fourth path

    # 1st path:
    path1 = Conv2D(filters=f1, kernel_size = (1,1), padding = 'same', activation = 'relu')(input_layer)

    # 2nd path
    path2 = Conv2D(filters = f2_conv1, kernel_size = (1,1), padding = 'same', activation = 'relu')(input_layer)
    path2 = Conv2D(filters = f2_conv3, kernel_size = (3,3), padding = 'same', activation = 'relu')(path2)

    # 3rd path
    path3 = Conv2D(filters = f3_conv1, kernel_size = (1,1), padding = 'same', activation = 'relu')(input_layer)
    path3 = Conv2D(filters = f3_conv5, kernel_size = (5,5), padding = 'same', activation = 'relu')(path3)

    # 4th path
    path4 = MaxPooling2D((3,3), strides= (1,1), padding = 'same')(input_layer)
    path4 = Conv2D(filters = f4, kernel_size = (1,1), padding = 'same', activation = 'relu')(path4)

    output_layer = concatenate([path1, path2, path3, path4], axis = -1)

    return output_layer

In [6]:
def GoogLeNet():
    # input layer
    input_layer = Input(shape = (32, 32, 3))

    # Adjusted initial layers
    X = Conv2D(filters = 64, kernel_size = (3,3), strides = 1, padding = 'same', activation = 'relu')(input_layer)
    X = MaxPooling2D(pool_size = (2,2), strides = 1)(X)

    # Convolutional layers
    X = Conv2D(filters = 64, kernel_size = (1,1), strides = 1, padding = 'same', activation = 'relu')(X)
    X = Conv2D(filters = 192, kernel_size = (3,3), padding = 'same', activation = 'relu')(X)
    X = MaxPooling2D(pool_size= (2,2), strides = 1)(X)
    # 1st Inception block
    X = Inception_block(X, f1 = 64, f2_conv1 = 96, f2_conv3 = 128, f3_conv1 = 16, f3_conv5 = 32, f4 = 32)

    # 2nd Inception block
    X = Inception_block(X, f1 = 128, f2_conv1 = 128, f2_conv3 = 192, f3_conv1 = 32, f3_conv5 = 96, f4 = 64)

    # max-pooling layer: pool_size = (3,3), strides = 2
    X = MaxPooling2D(pool_size= (2,2), strides = 2)(X)

    # 3rd Inception block
    X = Inception_block(X, f1 = 192, f2_conv1 = 96, f2_conv3 = 208, f3_conv1 = 16, f3_conv5 = 48, f4 = 64)

    # Extra network 1:
    # X1 = AveragePooling2D(pool_size = (5,5), strides = 3)(X)
    # X1 = Conv2D(filters = 128, kernel_size = (1,1), padding = 'same', activation = 'relu')(X1)
    # X1 = Flatten()(X1)
    # X1 = Dense(1024, activation = 'relu')(X1)
    # X1 = Dropout(0.7)(X1)
    # X1 = Dense(5, activation = 'softmax')(X1)


    # 4th Inception block
    X = Inception_block(X, f1 = 160, f2_conv1 = 112, f2_conv3 = 224, f3_conv1 = 24, f3_conv5 = 64, f4 = 64)

    # 5th Inception block
    X = Inception_block(X, f1 = 128, f2_conv1 = 128, f2_conv3 = 256, f3_conv1 = 24, f3_conv5 = 64, f4 = 64)

    # 6th Inception block
    X = Inception_block(X, f1 = 112, f2_conv1 = 144, f2_conv3 = 288, f3_conv1 = 32, f3_conv5 = 64, f4 = 64)

    # Extra network 2:
    # X2 = AveragePooling2D(pool_size = (5,5), strides = 3)(X)
    # X2 = Conv2D(filters = 128, kernel_size = (1,1), padding = 'same', activation = 'relu')(X2)
    # X2 = Flatten()(X2)
    # X2 = Dense(1024, activation = 'relu')(X2)
    # X2 = Dropout(0.7)(X2)
    # X2 = Dense(1000, activation = 'softmax')(X2)


    # 7th Inception block
    X = Inception_block(X, f1 = 256, f2_conv1 = 160, f2_conv3 = 320, f3_conv1 = 32,
                      f3_conv5 = 128, f4 = 128)

    # max-pooling layer: pool_size = (3,3), strides = 2
    X = MaxPooling2D(pool_size = (3,3), strides = 2)(X)

    # 8th Inception block
    X = Inception_block(X, f1 = 256, f2_conv1 = 160, f2_conv3 = 320, f3_conv1 = 32, f3_conv5 = 128, f4 = 128)

    # 9th Inception block
    X = Inception_block(X, f1 = 384, f2_conv1 = 192, f2_conv3 = 384, f3_conv1 = 48, f3_conv5 = 128, f4 = 128)

    # Global Average pooling layer
    X = GlobalAveragePooling2D(name = 'GAPL')(X)

    # Dropoutlayer
    X = Dropout(0.4)(X)

    # output layer
    X = Dense(3, activation = 'softmax')(X)

    # model
    model = Model(input_layer, X, name = 'GoogLeNet')

    return model

In [10]:
# Assuming 'superclass_index' column contains the class indices
num_classes = train_ann_df['superclass_index'].nunique()

def keras_generator(data_loader):
    while True:
        for images, super_idxs, _, _, _ in data_loader:
            # Transpose the images from (C, H, W) to (H, W, C) and convert to numpy array
            X = np.array([image.permute(1, 2, 0).numpy() for image in images])

            # Ensure the data type is float32, as expected by TensorFlow/Keras
            X = X.astype('float32')

            y = to_categorical(super_idxs, num_classes=num_classes)
            yield X, y


# Create the generator for training and validation
train_gen = keras_generator(train_loader)
val_gen = keras_generator(val_loader)

# Train the model
model = GoogLeNet()
model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

# Determine steps per epoch (total samples / batch size)
steps_per_epoch = len(train_dataset) // batch_size
validation_steps = len(val_dataset) // batch_size

history = model.fit(train_gen,
                    steps_per_epoch=steps_per_epoch,
                    validation_data=val_gen,
                    validation_steps=validation_steps,
                    epochs=10)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10

KeyboardInterrupt: 

In [17]:
# Step 1: Load pre-trained GoogLeNet
model = models.googlenet(pretrained=True)

# Step 2: Modify final layers
# Replace the final fully connected layers for your specific classification task
# For example, if you have 10 classes:
num_classes = 88
model.fc = nn.Linear(model.fc.in_features, num_classes)

# Step 3: Freeze layers
# Suppose you want to freeze the convolutional layers
for param in model.parameters():
    param.requires_grad = False

# Unfreeze the final classification layer
for param in model.fc.parameters():
    param.requires_grad = True

# Step 4: Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# Step 5: Training loop
# Iterate over your dataset and update the model parameters
# Only the parameters of the unfrozen layers will be updated
for epoch in range(20):
    for i, data in enumerate(train_loader):  # Adjust accordingly
        optimizer.zero_grad()
        outputs = model(data[0])
        loss = criterion(data[0], data[3])
        loss.backward()
        optimizer.step()

            
# Save the model if needed
torch.save(model.state_dict(), 'fine_tuned_googlenet.pth')

RuntimeError: only batches of spatial targets supported (3D tensors) but got targets of dimension: 1

In [None]:
# Training loop
for epoch in range(20):
    print(f'Epoch {epoch+1}')
    trainer.train_epoch()
    trainer.validate_epoch()
    print('')

print('Finished Training')