## Dataloader for CUB Birds, FGVC Aircraft, and FoodX dataset

In [1]:
from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
from torchvision.datasets import VisionDataset
from torchvision.datasets.folder import default_loader
import matplotlib.pyplot as plt
import time
import os
import copy

## 1 - Dataloader for CUB Dataset

In [2]:
class CUBDataset(torchvision.datasets.ImageFolder):
    """
    Dataset class for CUB Dataset
    """

    def __init__(self, image_root_path, caption_root_path=None, split="train", *args, **kwargs):
        """
        Args:
            image_root_path:      path to dir containing images and lists folders
            caption_root_path:    path to dir containing captions
            split:          train / test
            *args:
            **kwargs:
        """
        image_info = self.get_file_content(f"{image_root_path}/images.txt")
        self.image_id_to_name = {y[0]: y[1] for y in [x.strip().split(" ") for x in image_info]}
        split_info = self.get_file_content(f"{image_root_path}/train_test_split.txt")
        self.split_info = {self.image_id_to_name[y[0]]: y[1] for y in [x.strip().split(" ") for x in split_info]}
        self.split = "1" if split == "train" else "0"
        self.caption_root_path = caption_root_path

        super(CUBDataset, self).__init__(root=f"{image_root_path}/images", is_valid_file=self.is_valid_file,
                                         *args, **kwargs)

    def is_valid_file(self, x):
        return self.split_info[(x[len(self.root) + 1:])] == self.split

    @staticmethod
    def get_file_content(file_path):
        with open(file_path) as fo:
            content = fo.readlines()
        return content


In [3]:
data_root = "/apps/local/shared/CV703/datasets/CUB/CUB_200_2011"

mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)

# write data transform here as per the requirement
data_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std)
    ])

train_dataset_cub = CUBDataset(image_root_path=f"{data_root}", transform=data_transform, split="train")
test_dataset_cub = CUBDataset(image_root_path=f"{data_root}", transform=data_transform, split="test")


# load in into the torch dataloader to get variable batch size, shuffle 
train_loader_cub = torch.utils.data.DataLoader(train_dataset_cub, batch_size=32, drop_last=True, shuffle=True)
test_loader_cub = torch.utils.data.DataLoader(test_dataset_cub, batch_size=32, drop_last=False, shuffle=False)

### Test the dataloader

In [4]:
len(train_dataset_cub), len(test_dataset_cub)

(5994, 5794)

In [5]:
len(train_loader_cub), len(test_loader_cub)

(187, 182)

In [6]:
for i, (inputs, labels) in enumerate(train_loader_cub):
    print(inputs.shape)
    print(labels)
    print('='*50)
    break

torch.Size([32, 3, 224, 224])
tensor([186, 116,  22, 113, 170, 188,  27,   9, 186, 149, 111,  23,   4,  76,
        165,  65, 194,  78, 198, 112,  60, 166,  63,  49, 190,  37,  24, 139,
         70, 168,  94, 119])


## 2 - Dataloader for FGVC Aircraft Dataset

In [7]:
class FGVCAircraft(VisionDataset):
    """
    FGVC-Aircraft <http://www.robots.ox.ac.uk/~vgg/data/fgvc-aircraft/>`_ Dataset.

    Args:
        root (string): Root directory of the dataset.
        train (bool, optional): If True, creates dataset from training set, otherwise
            creates from test set.
        class_type (string, optional): choose from ('variant', 'family', 'manufacturer').
        transform (callable, optional): A function/transform that  takes in an PIL image
            and returns a transformed version. E.g, ``transforms.RandomCrop``
        target_transform (callable, optional): A function/transform that takes in the
            target and transforms it.
    """
    
    class_types = ('variant', 'family', 'manufacturer')
    splits = ('train', 'val', 'trainval', 'test')
    img_folder = os.path.join('data', 'images')

    def __init__(self, root, train=True, class_type='variant', transform=None,
                 target_transform=None):
        super(FGVCAircraft, self).__init__(root, transform=transform, target_transform=target_transform)
        split = 'trainval' if train else 'test'
        if split not in self.splits:
            raise ValueError('Split "{}" not found. Valid splits are: {}'.format(
                split, ', '.join(self.splits),
            ))
        if class_type not in self.class_types:
            raise ValueError('Class type "{}" not found. Valid class types are: {}'.format(
                class_type, ', '.join(self.class_types),
            ))

        self.class_type = class_type
        self.split = split
        self.classes_file = os.path.join(self.root, 'data',
                                         'images_%s_%s.txt' % (self.class_type, self.split))

        (image_ids, targets, classes, class_to_idx) = self.find_classes()
        samples = self.make_dataset(image_ids, targets)

        self.loader = default_loader

        self.samples = samples
        self.classes = classes
        self.class_to_idx = class_to_idx

    def __getitem__(self, index):
        path, target = self.samples[index]
        sample = self.loader(path)
        if self.transform is not None:
            sample = self.transform(sample)
        if self.target_transform is not None:
            target = self.target_transform(target)
        return sample, target

    def __len__(self):
        return len(self.samples)

    def find_classes(self):
        # read classes file, separating out image IDs and class names
        image_ids = []
        targets = []
        with open(self.classes_file, 'r') as f:
            for line in f:
                split_line = line.split(' ')
                image_ids.append(split_line[0])
                targets.append(' '.join(split_line[1:]))

        # index class names
        classes = np.unique(targets)
        class_to_idx = {classes[i]: i for i in range(len(classes))}
        targets = [class_to_idx[c] for c in targets]
        
        # Modify class index as we are going to concat to CUB dataset
        num_cub_classes = len(train_dataset_cub.class_to_idx)
        targets = [t + num_cub_classes for t in targets]

        return image_ids, targets, classes, class_to_idx

    def make_dataset(self, image_ids, targets):
        assert (len(image_ids) == len(targets))
        images = []
        for i in range(len(image_ids)):
            item = (os.path.join(self.root, self.img_folder,
                                 '%s.jpg' % image_ids[i]), targets[i])
            images.append(item)
        return images

In [8]:
data_root = "/apps/local/shared/CV703/datasets/fgvc-aircraft-2013b"

mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)

# write data transform here as per the requirement
data_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std)
    ])

train_dataset_aircraft = FGVCAircraft(root=f"{data_root}", transform=data_transform, train=True)
test_dataset_aircraft = FGVCAircraft(root=f"{data_root}", transform=data_transform, train=False)


# load in into the torch dataloader to get variable batch size, shuffle 
train_loader_aircraft = torch.utils.data.DataLoader(train_dataset_aircraft, batch_size=32, drop_last=True, shuffle=True)
test_loader_aircraft = torch.utils.data.DataLoader(test_dataset_aircraft, batch_size=32, drop_last=False, shuffle=False)

### Test the dataloader

In [9]:
len(train_dataset_aircraft), len(test_dataset_aircraft)

(6667, 3333)

In [10]:
len(train_loader_aircraft), len(test_loader_aircraft)

(208, 105)

In [11]:
for i, (inputs, labels) in enumerate(train_loader_aircraft):
    print(inputs.shape)
    print(labels)
    print('='*50)
    break

torch.Size([32, 3, 224, 224])
tensor([235, 241, 215, 245, 252, 244, 226, 239, 222, 247, 227, 226, 234, 266,
        257, 219, 294, 232, 200, 297, 236, 230, 278, 210, 279, 226, 247, 235,
        210, 231, 204, 258])


## Concatenate CUB Birds and FGVC Aircraft Datasets

In [12]:
from torch.utils.data import ConcatDataset 

In [13]:
concat_dataset_train = ConcatDataset([train_dataset_cub, train_dataset_aircraft])
concat_dataset_test = ConcatDataset([test_dataset_cub, test_dataset_aircraft])

concat_loader_train = torch.utils.data.DataLoader(
             concat_dataset_train,
             batch_size=128, shuffle=True,
             num_workers=1, pin_memory=True
            )
concat_loader_test = torch.utils.data.DataLoader(
             concat_dataset_test,
             batch_size=128, shuffle=False,
             num_workers=1, pin_memory=True
            )

In [14]:
len(concat_dataset_train), len(concat_dataset_test)

(12661, 9127)

In [15]:
for i, (inputs, labels) in enumerate(concat_loader_train):
    print(inputs.shape)
    print(labels)
    print('='*50)
    break

torch.Size([128, 3, 224, 224])
tensor([243, 283,  35, 265, 247, 218, 210, 200, 289, 209, 201, 133, 255, 188,
        138, 242, 139,   1, 136, 267, 146, 226,  25, 244, 240, 224, 250, 275,
        264, 216, 299, 265, 213, 276,  53, 254, 182, 128, 252,  57,  75,  52,
        161,  82, 206, 277, 215, 272, 101, 236,  39, 220, 244, 220, 183, 157,
        253, 173, 137, 216, 249, 132, 261, 157, 177,  62, 276, 239, 258, 147,
        206, 101, 157, 212, 246, 257, 274, 211, 221, 169,  82, 238, 179,  31,
        242, 126, 228, 261, 280, 105,  54, 237,  59,  23, 141,  50,  40, 272,
        255, 239, 109, 219, 127, 264,  59, 187, 104, 258, 296, 151, 278, 119,
        166,  81, 214,  68, 128,  28, 251, 294, 205, 288, 223, 267, 298,  60,
        269, 249])


### Way to get information about class names --> Labels

In [16]:
concat_dataset_train.datasets

[Dataset CUBDataset
     Number of datapoints: 5994
     Root location: /apps/local/shared/CV703/datasets/CUB/CUB_200_2011/images
     StandardTransform
 Transform: Compose(
                Resize(size=(224, 224), interpolation=bilinear, max_size=None, antialias=warn)
                ToTensor()
                Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
            ),
 Dataset FGVCAircraft
     Number of datapoints: 6667
     Root location: /apps/local/shared/CV703/datasets/fgvc-aircraft-2013b
     StandardTransform
 Transform: Compose(
                Resize(size=(224, 224), interpolation=bilinear, max_size=None, antialias=warn)
                ToTensor()
                Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
            )]

In [17]:
concat_dataset_train.datasets[0].class_to_idx

{'001.Black_footed_Albatross': 0,
 '002.Laysan_Albatross': 1,
 '003.Sooty_Albatross': 2,
 '004.Groove_billed_Ani': 3,
 '005.Crested_Auklet': 4,
 '006.Least_Auklet': 5,
 '007.Parakeet_Auklet': 6,
 '008.Rhinoceros_Auklet': 7,
 '009.Brewer_Blackbird': 8,
 '010.Red_winged_Blackbird': 9,
 '011.Rusty_Blackbird': 10,
 '012.Yellow_headed_Blackbird': 11,
 '013.Bobolink': 12,
 '014.Indigo_Bunting': 13,
 '015.Lazuli_Bunting': 14,
 '016.Painted_Bunting': 15,
 '017.Cardinal': 16,
 '018.Spotted_Catbird': 17,
 '019.Gray_Catbird': 18,
 '020.Yellow_breasted_Chat': 19,
 '021.Eastern_Towhee': 20,
 '022.Chuck_will_Widow': 21,
 '023.Brandt_Cormorant': 22,
 '024.Red_faced_Cormorant': 23,
 '025.Pelagic_Cormorant': 24,
 '026.Bronzed_Cowbird': 25,
 '027.Shiny_Cowbird': 26,
 '028.Brown_Creeper': 27,
 '029.American_Crow': 28,
 '030.Fish_Crow': 29,
 '031.Black_billed_Cuckoo': 30,
 '032.Mangrove_Cuckoo': 31,
 '033.Yellow_billed_Cuckoo': 32,
 '034.Gray_crowned_Rosy_Finch': 33,
 '035.Purple_Finch': 34,
 '036.Norther

In [18]:
concat_dataset_train.datasets[1].class_to_idx

{'707-320\n': 0,
 '727-200\n': 1,
 '737-200\n': 2,
 '737-300\n': 3,
 '737-400\n': 4,
 '737-500\n': 5,
 '737-600\n': 6,
 '737-700\n': 7,
 '737-800\n': 8,
 '737-900\n': 9,
 '747-100\n': 10,
 '747-200\n': 11,
 '747-300\n': 12,
 '747-400\n': 13,
 '757-200\n': 14,
 '757-300\n': 15,
 '767-200\n': 16,
 '767-300\n': 17,
 '767-400\n': 18,
 '777-200\n': 19,
 '777-300\n': 20,
 'A300B4\n': 21,
 'A310\n': 22,
 'A318\n': 23,
 'A319\n': 24,
 'A320\n': 25,
 'A321\n': 26,
 'A330-200\n': 27,
 'A330-300\n': 28,
 'A340-200\n': 29,
 'A340-300\n': 30,
 'A340-500\n': 31,
 'A340-600\n': 32,
 'A380\n': 33,
 'ATR-42\n': 34,
 'ATR-72\n': 35,
 'An-12\n': 36,
 'BAE 146-200\n': 37,
 'BAE 146-300\n': 38,
 'BAE-125\n': 39,
 'Beechcraft 1900\n': 40,
 'Boeing 717\n': 41,
 'C-130\n': 42,
 'C-47\n': 43,
 'CRJ-200\n': 44,
 'CRJ-700\n': 45,
 'CRJ-900\n': 46,
 'Cessna 172\n': 47,
 'Cessna 208\n': 48,
 'Cessna 525\n': 49,
 'Cessna 560\n': 50,
 'Challenger 600\n': 51,
 'DC-10\n': 52,
 'DC-3\n': 53,
 'DC-6\n': 54,
 'DC-8\n': 5

## 3. Dataloader for Food Dataset

In [21]:
import os
import torch
import torchvision
from torchvision import datasets, models, transforms
from PIL import Image
import pandas as pd

In [22]:
data_dir = "/apps/local/shared/CV703/datasets/FoodX/food_dataset"

split = 'train'
train_df = pd.read_csv(f'{data_dir}/annot/{split}_info.csv', names= ['image_name','label'])
train_df['path'] = train_df['image_name'].map(lambda x: os.path.join(f'{data_dir}/{split}_set/', x))


split = 'val'
val_df = pd.read_csv(f'{data_dir}/annot/{split}_info.csv', names= ['image_name','label'])
val_df['path'] = val_df['image_name'].map(lambda x: os.path.join(f'{data_dir}/{split}_set/', x))

In [23]:
data_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Resize((224, 224))
        
    ])

In [24]:
class FOODDataset(torch.utils.data.Dataset):
    def __init__(self, dataframe):
        self.dataframe = dataframe

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, index):
        row = self.dataframe.iloc[index]
        return (
            data_transform(Image.open(row["path"])), row['label']
        )

In [25]:
train_dataset = FOODDataset(train_df)
val_dataset = FOODDataset(val_df)

# load in into the torch dataloader to get variable batch size, shuffle 
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, drop_last=True, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=32, drop_last=False, shuffle=True)

In [26]:
len(train_dataset), len(val_dataset)

(118475, 11994)

In [27]:
len(train_loader), len(val_loader)

(3702, 375)

In [28]:
for i, (inputs, labels) in enumerate(val_loader):
    print(inputs.shape)
    print(labels)
    print('='*50)
    break

torch.Size([32, 3, 224, 224])
tensor([173,   8, 154, 217,  44,  33,  68, 146,   3,  53,  37, 130, 175, 230,
         23, 233,  82, 146,  26, 138,  48, 147, 136, 154, 178, 157, 217, 181,
         33, 219, 108, 104])
