In [20]:
import os
from functools import partial

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Torch
import torch
# import torchinfo
import torchvision
from torch import nn
from torchvision.transforms._presets import ImageClassification
from torch.utils.data import Dataset, DataLoader

# ML-related
from scipy.io import loadmat
import matplotlib.pyplot as plt

# Default Python
import random
from pathlib import Path

# Other Libraries
from PIL import Image
from tqdm import tqdm

In [25]:
from torchvision.transforms import functional as F
from torchvision.transforms.functional import InterpolationMode
from typing import Tuple, Optional, Union

def img_classification_transform(
    img,
    crop_size: int = 224,
    resize_size: int = 256,
    mean: Tuple[float, ...] = (0.485, 0.456, 0.406),
    std: Tuple[float, ...] = (0.229, 0.224, 0.225),
    interpolation: InterpolationMode = InterpolationMode.BILINEAR,
    antialias: Optional[Union[str, bool]] = "warn"
):
    img = F.resize(img, resize_size, interpolation=interpolation, antialias=antialias)
    img = F.center_crop(img, crop_size)
    if not isinstance(img, torch.Tensor):
        img = F.pil_to_tensor(img)
    img = F.convert_image_dtype(img, torch.float)
    img = F.normalize(img, mean=mean, std=std)
    
    return img

In [26]:
class StanfordCarsCustomDataset(Dataset):
    def __init__(self, data_dir='../data/', stage='train', transforms=img_classification_transform):
        super().__init__() 
        
        # images
        self.directory = f'{data_dir}/stanford-cars-dataset/cars_{stage}/cars_{stage}'
        self.images = [os.path.join(self.directory, f) for f in os.listdir(self.directory)]
        
        # transforms
        self.transforms = transforms        
        
        # annotations
        cars_annos_train_mat = loadmat(f'{data_dir}/stanford-cars-dataset-meta/devkit/cars_train_annos.mat')
        cars_annos_test_mat = loadmat(f'{data_dir}/stanford-cars-dataset-meta/cars_test_annos_withlabels (1).mat')
        
        self.training_image_label_dictionary, self.testing_image_label_dictionary = {}, {}

        for arr in cars_annos_train_mat['annotations'][0]:
            image, label = arr[-1][0], arr[-2][0][0] - 1
            self.training_image_label_dictionary[image] = label

        for arr in cars_annos_test_mat['annotations'][0]:
            image, label = arr[-1][0], arr[-2][0][0] - 1
            self.testing_image_label_dictionary[image] = label
            
        if stage == 'train':
            self.image_label_dict = self.training_image_label_dictionary
        elif stage == 'test':
            self.image_label_dict = self.testing_image_label_dictionary

    def __len__(self):
        return len(self.images)

    def __getitem__(self, index):
        # Get image
        image = self.images[index]
        img_pil = Image.open(image).convert('RGB')
        img_trans = self.transforms(img_pil)

        # Parse out the label from cars_meta and cars_x_annos files
        image_stem = image.split("/")[-1]
        img_label = self.image_label_dict[image_stem]

        return img_trans, img_label

In [29]:
trains_ds = StanfordCarsCustomDataset(stage='train')
train_dl = DataLoader(trains_ds, batch_size=1, shuffle=True)

batch = next(iter(train_dl))

In [30]:
print(batch[0].shape)

torch.Size([1, 3, 224, 224])


In [36]:
import os

counter = 0

data_dir = '../data/stanford-cars-dataset/cars_train/cars_train/'

for item in os.listdir('../data/stanford-cars-dataset/cars_train/cars_train/'):
    if not item.endswith('.jpg'):
        os.remove(f'{data_dir}{item}')
        
print(counter)

0
