In [1]:
import pandas as pd
import os
import numpy as np
import scipy
import torch
import torch.nn as nn
from torchvision import transforms, datasets, models
from torch.utils.data import DataLoader, Dataset
from PIL import Image

In [2]:
current_directory = os.getcwd()
print(current_directory)

/home/r11kaijun/sc4001/Oxford-Flowers


In [3]:
# get the actual classification for the test set
set_dict = scipy.io.loadmat(current_directory + "/setid.mat")

label_idx_arr = []
filename_arr = []
for image_number in set_dict["tstid"][0]:
    label_idx_arr.append(image_number)
    image_number_str = str(image_number)
    filename = "image_" + (5 - len(str(image_number))) * "0" + image_number_str + ".jpg"
    filename_arr.append(filename)

In [7]:
set_dict

{'__header__': b'MATLAB 5.0 MAT-file, Platform: GLNX86, Created on: Thu Feb 19 17:38:58 2009',
 '__version__': '1.0',
 '__globals__': [],
 'trnid': array([[6765, 6755, 6768, ..., 8026, 8036, 8041]], dtype=uint16),
 'valid': array([[6773, 6767, 6739, ..., 8028, 8008, 8030]], dtype=uint16),
 'tstid': array([[6734, 6735, 6737, ..., 8044, 8045, 8047]], dtype=uint16)}

In [4]:
file_df = pd.DataFrame(data={"label_idx": label_idx_arr, "filename": filename_arr})
file_df

Unnamed: 0,label_idx,filename
0,6734,image_06734.jpg
1,6735,image_06735.jpg
2,6737,image_06737.jpg
3,6742,image_06742.jpg
4,6743,image_06743.jpg
...,...,...
6144,8040,image_08040.jpg
6145,8043,image_08043.jpg
6146,8044,image_08044.jpg
6147,8045,image_08045.jpg


In [5]:
mat_dict = scipy.io.loadmat(current_directory + "/imagelabels.mat")
label_df = pd.DataFrame(
    data={
        "label_idx": np.arange(1, 8190),
        "labels": mat_dict["labels"][0]
    },
)

label_df

Unnamed: 0,label_idx,labels
0,1,77
1,2,77
2,3,77
3,4,77
4,5,77
...,...,...
8184,8185,62
8185,8186,62
8186,8187,62
8187,8188,62


In [6]:
# applying merge
# https://www.geeksforgeeks.org/merge-two-pandas-dataframes-on-certain-columns/
df_merged = label_df.merge(file_df)
df_merged

Unnamed: 0,label_idx,labels,filename
0,1,77,image_00001.jpg
1,2,77,image_00002.jpg
2,3,77,image_00003.jpg
3,4,77,image_00004.jpg
4,5,77,image_00005.jpg
...,...,...,...
6144,8183,62,image_08183.jpg
6145,8184,62,image_08184.jpg
6146,8186,62,image_08186.jpg
6147,8188,62,image_08188.jpg


In [7]:
def get_file_label_mapping(base_dir):
    # get the actual classification for the test set
    set_dict = scipy.io.loadmat(base_dir + "/setid.mat")

    label_idx_arr = []
    filename_arr = []
    for image_number in set_dict["tstid"][0]:
        label_idx_arr.append(image_number)
        image_number_str = str(image_number)
        filename = (
            "image_" + (5 - len(str(image_number))) * "0" + image_number_str + ".jpg"
        )
        filename_arr.append(filename)

    file_df = pd.DataFrame(data={"label_idx": label_idx_arr, "filename": filename_arr})

    mat_dict = scipy.io.loadmat(base_dir + "/imagelabels.mat")
    label_df = pd.DataFrame(
        data={"label_idx": np.arange(1, 8190), "labels": mat_dict["labels"][0]},
    )

    file_label_df = label_df.merge(file_df)
    return file_label_df


file_label_df = get_file_label_mapping(current_directory)
file_label_df

Unnamed: 0,label_idx,labels,filename
0,1,77,image_00001.jpg
1,2,77,image_00002.jpg
2,3,77,image_00003.jpg
3,4,77,image_00004.jpg
4,5,77,image_00005.jpg
...,...,...,...
6144,8183,62,image_08183.jpg
6145,8184,62,image_08184.jpg
6146,8186,62,image_08186.jpg
6147,8188,62,image_08188.jpg


In [8]:
class TestImageDataset(Dataset):
    r"""
    Args:
        img_filename: an iterable of filenames to the image
        labels: an iterable of labels
        transform: transformations to the image
        target_transform: transformations to the label
    """

    def __init__(
        self, img_dir, img_filenames, labels, transform=None, target_transform=None
    ):
        self.img_dir = img_dir
        self.img_filenames = img_filenames
        self.img_labels = labels
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        filename = self.img_filenames[idx]
        label = self.img_labels[idx]

        # open the image
        path = self.img_dir + "/" + filename
        img = Image.open(path)

        # transform the image
        if self.transform is not None:
            img = self.transform(img)

        # transform the labels
        if self.target_transform is not None:
            label = self.target_transform(label)

        return img, label

In [20]:
weights = models.VGG16_Weights.IMAGENET1K_V1
preprocess = weights.transforms()
preprocess

ImageClassification(
    crop_size=[224]
    resize_size=[256]
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BILINEAR
)

In [21]:
img_dir = current_directory + "/flowers-102/jpg/"
labels = df_merged["labels"]
img_filenames = df_merged["filename"]

img_datatset = TestImageDataset(img_dir=img_dir, labels=labels, img_filenames=img_filenames, transform=preprocess)

In [26]:
img, label = img_datatset[0]
print("img", img, "\nlabel", label)

img tensor([[[-0.4568, -0.4739, -0.4739,  ...,  0.1254,  0.1426,  0.1083],
         [-0.4397, -0.4568, -0.4568,  ...,  0.2282,  0.2453,  0.2624],
         [-0.3883, -0.4226, -0.4397,  ...,  0.3652,  0.3481,  0.3309],
         ...,
         [-1.2959, -1.2959, -1.3130,  ..., -1.3987, -1.4158, -1.4329],
         [-1.3815, -1.3815, -1.3644,  ..., -1.3302, -1.2959, -1.3130],
         [-1.4329, -1.4329, -1.4500,  ..., -1.1075, -1.0733, -1.1247]],

        [[ 0.4853,  0.5028,  0.5903,  ...,  0.5203,  0.5028,  0.4328],
         [ 0.5378,  0.5378,  0.5903,  ...,  0.6954,  0.6604,  0.6078],
         [ 0.5903,  0.5903,  0.5903,  ...,  0.8529,  0.8179,  0.7654],
         ...,
         [-1.0028, -1.0028, -1.0028,  ..., -1.1954, -1.2129, -1.2654],
         [-1.0903, -1.0903, -1.1253,  ..., -1.0553, -1.0553, -1.1604],
         [-1.1779, -1.1779, -1.2129,  ..., -0.8277, -0.8452, -0.9678]],

        [[-1.4733, -1.5081, -1.5953,  ..., -0.3404, -0.3404, -0.3927],
         [-1.4559, -1.5430, -1.5779,  ...

In [None]:
# class VGG16FineTune(nn.Module):
#     def __init__(self):
#         super(VGG16FineTune, self).__init__()

#         # get the base model
#         # initiate new instance, so that they don't interfere with each other
#         weights = models.VGG16_Weights.IMAGENET1K_V1
#         base = models.vgg16(weights=weights)

#         # freeze all of the previous layers
#         for param in base.parameters():
#             param.requires_grad = False

#         # VGG16's original feature extractor remains unchanged
#         self.features = base.features

#         last_input_feature = base.classifier[6].in_features  # Input to the last layer
#         self.classifier = nn.Sequential(
#             *list(base.classifier[:-1]),  # Retain all layers except the last one
#             nn.Linear(last_input_feature, 102)  # Output layer for 102 classes
#         )

#     def forward(self, X):
#         X = self.features(X)
#         # flatten the feature maps into single vectors for fully connected layers
#         X = torch.flatten(X, 1)
#         y = self.classifier(X)
#         return y

In [None]:
class VGG16FineTune(nn.Module):
    def __init__(self, no_features):
        super(VGG16FineTune, self).__init__()

        # get the base model
        # initiate new instance, so that they don't interfere with each other
        weights = models.VGG16_Weights.IMAGENET1K_V1
        base = models.vgg16(weights=weights)

        # freeze all of the previous layers
        for param in base.parameters():
            param.requires_grad = False

        # Swap out the classificaion module
        self.classifier = nn.Sequential(
            nn.Linear(25088, no_features),  # VGG16's original structure
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(no_features, 102),  # Change out_features to match 102 classes
        )

    def forward(self, X):
        X = self.features(X)
        # flatten the feature maps into single vectors for fully connected layers
        X = torch.flatten(X, 1)
        y = self.classifier(X)
        return y

In [8]:
from utils import get_file_label_mapping

In [12]:
file_label_df = get_file_label_mapping(current_directory, "train")
file_label_df

key trnid


Unnamed: 0,label_idx,labels,filename
0,28,77,image_00028.jpg
1,36,77,image_00036.jpg
2,79,77,image_00079.jpg
3,116,77,image_00116.jpg
4,118,77,image_00118.jpg
...,...,...,...
1015,8165,62,image_08165.jpg
1016,8166,62,image_08166.jpg
1017,8167,62,image_08167.jpg
1018,8175,62,image_08175.jpg
