In [1]:
from torchvision import datasets


In [7]:
%%writefile preprocess.py

import os
import pandas as pd
import numpy as np

from tqdm import tqdm
from sklearn.preprocessing import LabelBinarizer

root_dir = '../input/caltech256/256_ObjectCategories'
# get all the folder paths
all_paths = os.listdir(root_dir)

# create a DataFrame
data = pd.DataFrame()

images = []
labels = []
counter = 0
for folder_path in tqdm(all_paths, total=len(all_paths)):
    # get all the image names in the particular folder
    image_paths = os.listdir(f"{root_dir}/{folder_path}")
    # get the folder as label
    label = folder_path.split('.')[-1]
    
    if label == 'clutter':
        continue

    # save image paths in the DataFrame
    for image_path in image_paths:
        if image_path.split('.')[-1] == 'jpg':
            data.loc[counter, 'image_path'] = f"{root_dir}/{folder_path}/{image_path}"
            labels.append(label)
            counter += 1

labels = np.array(labels)
# one-hot encode the labels
lb = LabelBinarizer()
labels = lb.fit_transform(labels)

# add the image labels to the dataframe
for i in range(len(labels)):
    index = np.argmax(labels[i])
    data.loc[i, 'target'] = int(index)
    
# shuffle the dataset
data = data.sample(frac=1).reset_index(drop=True)

print(f"Number of labels or classes: {len(lb.classes_)}")
print(f"The first one hot encoded labels: {labels[0]}")
print(f"Mapping the first one hot encoded label to its category: {lb.classes_[0]}")
print(f"Total instances: {len(data)}")
 
# save as CSV file
data.to_csv('data.csv', index=False)
 
print(data.head(5))

Writing preprocess.py


In [8]:
%%writefile dataset.py

import albumentations
import numpy as np
import torch

from PIL import Image
from torch.utils.data import Dataset

# custom dataset
class ImageDataset(Dataset):
    def __init__(self, images, labels=None, tfms=None):
        self.X = images
        self.y = labels

        # apply augmentations
        if tfms == 0: # if validating
            self.aug = albumentations.Compose([
                albumentations.Resize(224, 224, always_apply=True),
            ])
        else: # if training
            self.aug = albumentations.Compose([
                albumentations.Resize(224, 224, always_apply=True),
                albumentations.HorizontalFlip(p=0.5),
                albumentations.ShiftScaleRotate(
                    shift_limit=0.3,
                    scale_limit=0.3,
                    rotate_limit=15,
                    p=0.5
                ),
            ])
         
    def __len__(self):
        return (len(self.X))
    
    def __getitem__(self, i):
        image = Image.open(self.X[i])
        image = image.convert('RGB')
        image = self.aug(image=np.array(image))['image']
        image = np.transpose(image, (2, 0, 1)).astype(np.float32)
        label = self.y[i]
        return {
            'image': torch.tensor(image, dtype=torch.float), 
            'target': torch.tensor(label, dtype=torch.long)
        }

Writing dataset.py


In [9]:
%%writefile model.py

import pretrainedmodels
import torch.nn as nn
import torch.nn.functional as F

class ResNet50(nn.Module):
    def __init__(self, pretrained, requires_grad):
        super(ResNet50, self).__init__()
        if pretrained is True:
            self.model = pretrainedmodels.__dict__['resnet50'](pretrained='imagenet')
        else:
            self.model = pretrainedmodels.__dict__['resnet50'](pretrained=None)
            
        if requires_grad == True:
            for param in self.model.parameters():
                param.requires_grad = True
        elif requires_grad == False:
            for param in self.model.parameters():
                param.requires_grad = False
        
        self.l0 = nn.Linear(2048, 256)

    def forward(self, x):
        batch, _, _, _ = x.shape
        x = self.model.features(x)
        x = F.adaptive_avg_pool2d(x, 1).reshape(batch, -1)
        l0 = self.l0(x)
        return l0

model = ResNet50(pretrained=True, requires_grad=False)
# print(model)

Writing model.py


In [12]:
from model import model
from dataset import ImageDataset

In [18]:
import pandas as pd 
from torch.utils.data import DataLoader
import torch

In [16]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# get the dataset ready
df = pd.read_csv('data.csv')
X = df.image_path.values # image paths
y = df.target.values # targets

In [17]:
image_data = ImageDataset(X, y, tfms=0)

In [29]:
image_data[0]


{'image': tensor([[[167., 168., 170.,  ..., 181., 181., 182.],
          [168., 169., 169.,  ..., 177., 178., 178.],
          [169., 169., 169.,  ..., 179., 178., 178.],
          ...,
          [165., 165., 166.,  ..., 177., 177., 177.],
          [167., 166., 166.,  ..., 177., 177., 176.],
          [168., 167., 166.,  ..., 177., 177., 176.]],
 
         [[176., 177., 179.,  ..., 190., 191., 191.],
          [177., 178., 178.,  ..., 186., 187., 187.],
          [177., 178., 178.,  ..., 188., 188., 187.],
          ...,
          [174., 174., 175.,  ..., 187., 187., 187.],
          [176., 175., 175.,  ..., 186., 186., 185.],
          [177., 176., 175.,  ..., 186., 186., 185.]],
 
         [[173., 174., 176.,  ..., 187., 187., 188.],
          [174., 175., 175.,  ..., 184., 184., 184.],
          [175., 175., 175.,  ..., 185., 185., 184.],
          ...,
          [171., 171., 172.,  ..., 184., 184., 184.],
          [173., 172., 172.,  ..., 183., 183., 182.],
          [174., 173.,

In [27]:
batch_X, batch_y = next(image_iter)

In [28]:
batch_X


'image'

In [31]:
features = model(image_data[0]['image'].unsqueeze(0))

In [None]:
print(features)