In [213]:
import os
import torch
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image

## Specifications

In [214]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(torch.cuda.get_device_properties(0))

_CudaDeviceProperties(name='NVIDIA GeForce RTX 2060', major=7, minor=5, total_memory=6143MB, multi_processor_count=30)


In [215]:
dataset_dir = "datasets/signDatabasePublicFramesOnly/"
output = pd.read_csv(dataset_dir + "allAnnotations.csv")

## Custom Dataset and Dataloader

In [216]:
def preprocess(anImg, aBoxes):
    theImageWidth, theImageHeight = anImg.size
    scale_x, scale_y = 416/theImageWidth, 416/theImageHeight
    class_label, x1, y1, x2, y2 = aBoxes[0]

    x1, x2 = x1 * scale_x, x2 * scale_x
    y1, y2 = y1 * scale_y, y2 * scale_y

    cx = (x1 + x2) / 2 / 416
    cy = (y1 + y2) / 2 / 416
    width = x2 - x1 / 416
    height = y2 - y1 / 416

    return anImg.resize((416,416)), torch.tensor([[class_label, cx, cy, width, height]])

    

In [217]:
class LISA(Dataset):
    "This custom dataset was made using the data provided by the CVRR Laboratory at UCSD. More information can be found at https://cvrr.ucsd.edu/home"
    def __init__(self, csv_file, root_dir, S=7, B=2, C=47, transform=None):
        self.annotations = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform
        self.labels = {class_name: idx for idx, class_name in enumerate(self.annotations.iloc[:, 0].apply(lambda x: x.split(';')[1]).unique())}
        self.S = S
        self.B = B
        self.C = C

    def __len__(self):
        return len(self.annotations)
    
    def __getitem__(self, index):
        theBoxes = []

        theImgPath, *theBbox, _ = self.annotations.iloc[index, 0].split(";")
        theBbox[0] = self.labels[theBbox[0]] # Convert to integer
        theBbox[1:] = list(map(int, theBbox[1:])) # Class Name, Upper Left X, Lower Right X, Upper Left Y, Lower Right Y
        theBoxes.append(theBbox)
        theImgPath = os.path.join(self.root_dir, theImgPath)
        theImg = Image.open(theImgPath)

        theBoxes = torch.tensor(theBoxes)
        if self.transform:
            theImg, theBoxes = self.transform(theImg, theBoxes)

        theLabels = torch.zeros((self.S, self.S, self.C + 5 * self.B))

        for theBox in theBoxes:
            class_label, x, y, width, height = theBox.tolist()
            class_label = int(class_label)

            # i,j represents the cell row and cell column
            i, j = int(self.S * y), int(self.S * x)
            x_cell, y_cell = self.S * x - j, self.S * y - i

            """
            Calculating the width and height of cell of bounding box,
            relative to the cell is done by the following, with
            width as the example:
            
            width_pixels = (width*self.image_width)
            cell_pixels = (self.image_width)
            
            Then to find the width relative to the cell is simply:
            width_pixels/cell_pixels, simplification leads to the
            formulas below.
            """
            width_cell, height_cell = (
                width * self.S,
                height * self.S,
            )

            # If no object already found for specific cell i,j
            # Note: This means we restrict to ONE object
            # per cell!
            if theLabels[i, j, 47] == 0:
                # Set that there exists an object
                theLabels[i, j, 47] = 1

                # Box coordinates
                box_coordinates = torch.tensor(
                    [x_cell, y_cell, width_cell, height_cell]
                )

                theLabels[i, j, 48:52] = box_coordinates

                # Set one hot encoding for class_label
                theLabels[i, j, class_label] = 1

        return theImg, theLabels
            


In [220]:
theDataset = LISA(dataset_dir + "allAnnotations.csv", dataset_dir, transform = preprocess)
theImg, theLabels = theDataset.__getitem__(1)