In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
from PIL import Image
from tempfile import TemporaryDirectory

cudnn.benchmark = True
plt.ion()   # interactive mode

<matplotlib.pyplot._IonContext at 0x771ebd4ff400>

In [14]:
import torch
import os

from torchvision.io import read_image
from torchvision.ops.boxes import masks_to_boxes
from torchvision import tv_tensors
from torchvision.transforms.v2 import functional as F
from torchvision.transforms import v2 as T

class DrivingDataset(torch.utils.data.Dataset):
    classes = ["Background", "Drivable", "Wall", "Obstacle"]
    
    def __init__(self, image_dir, mask_dir, transforms=None):
        # Collect image file names to sorted list
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.image_file_list = sorted(os.listdir(image_dir))
        self.mask_file_list = sorted(os.listdir(mask_dir))
        self.transforms = transforms
        
    def __len__(self):
        return len(self.image_file_list)
    
    def __getitem__(self, idx):
        # First we need to collect the image associated with the index
        img = read_image(os.path.join(self.image_dir, self.image_file_list[idx]))
        
        # Next we need to get the associated mask png
        mask = read_image(os.path.join(self.mask_dir, self.mask_file_list[idx]))
        
        if self.transforms:
            img = self.transforms(img)
            mask = self.transforms(mask)
        
        # Convert the pixels to single representative identifiers. 
#         for h in range(mask.shape[1]):
#             for w in range(mask.shape[2]):
#                 if (mask[:, h, w] == [128, 0, 0]):
#                     # Drivable
#                     mask[0, h, w] = 1
#                 elif (mask[:, h, w] == [0, 128, 0]):
#                     # Wall
#                     mask[0, h, w] = 2
#                 elif (mask[:, h, w] == [128, 128, 0]):
#                     # Obstacle
#                     mask[0, h, w] = 3
#                 else:
#                     # Background
#                     mask[0, h, w] = 0
        print(1)
        mask[:3, :, :][mask[:3, :, :] == [128, 0, 0]] = 1
        mask[:3, :, :][mask[:3, :, :] == [0, 128, 0]] = 2
        mask[:3, :, :][mask[:3, :, :] == [128, 128, 0]] = 3
        print(2)
    
        # Convert to a single dimention for pixel i.e. [1, H, W]
        mask = mask[0:1]

        # Collect all the different unique elements labled in the mask. (In this set each person)
        obj_ids = torch.unique(mask)

        # Remove the first element as it is background labeling
        obj_ids = obj_ids[1:]
        num_objs = len(obj_ids)

        # Making binary mask of objects found.
        # [:, None, None] is needed to reshape the obj_ids tensor which is a 1D tensor containing the different colors
        # So that an element wise comparison against the pixels in the mask can be done. given Trues where pixels
        # part of an object and false elsewhere. Then translating that to 1 and 0s instead of booleans.
        masks = (mask == obj_ids[:, None, None]).to(dtype=torch.uint8)

        # get boxes of that bound the objects
        boxes = masks_to_boxes(masks)
            
        image_id = idx
        
        # Boxes are in this format:
        # Column 0: x-coordinate of the top-left corner
        # Column 1: y-coordinate of the top-left corner
        # Column 2: x-coordinate of the bottom-right corner
        # Column 3: y-coordinate of the bottom-right corner
        area = (boxes[:,3] - boxes[:,1]) * (boxes[:,2] - boxes[:,0])
        
        # suppose all instances are not crowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)
        
        # Make image a TVTensor
        img = tv_tensors.Image(img)
        
        target = {}
        target["boxes"] = tv_tensors.BoundingBoxes(boxes, format="XYXY", canvas_size = F.get_size(img))
        target["masks"] = tv_tensors.Mask(masks)
        target["labels"] = obj_ids.to(dtype=torch.int64)
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        return img, target

In [15]:
# Here we will show the data augmentations and normalizations that will improve training
# On validation data we will only do normalizations.
# We need to pull the datasets as well and make the dataloaders based on the pre-made datasets

# Dict that will have the transforms for training data alongside validation.
data_transforms = {
    'train': T.Compose([
        T.ToDtype(torch.float, scale=True),
        T.Resize(800, max_size=1333),
        # Horizontally flip the given image randomly with a given probability. Default is .5
        T.RandomHorizontalFlip(),
        # Makes output a tensor
        T.ToPureTensor(),
        # First array is Mean subtraction so it  subtracts the mean value ([0.485, 0.456, 0.406]) from each
        # color channel (red, green, blue) of the image. This shifts the pixel values closer to zero.
        # Second array is division by standard deviation it divides each color by the values here. 
        # This will scale the pixel values to have a similar range across different channels
        # These numbers were chosen specifically because these are the calculated average mean
        # and standard deviation
        # across the entire ImageNet dataset.
        T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': T.Compose([
        T.ToDtype(torch.float, scale=True),
        T.Resize(800, max_size=1333),
        T.ToPureTensor(),
        T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

image_dir = 'Data/SelfDrivingRC/Images'
mask_dir = 'Data/SelfDrivingRC/SegmentationClass'
# Dict of train data and val data, keyed by 'train' & 'val' respectfully
image_datasets = {x: DrivingDataset(os.path.join(image_dir, x),
                                    os.path.join(mask_dir, x),
                                          data_transforms[x])
                  for x in ['train', 'val']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4,
                                             shuffle=True, num_workers=4)
              for x in ['train', 'val']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [16]:
print(class_names)

['Background', 'Drivable', 'Wall', 'Obstacle']


In [17]:
def imshow(inp, title=None):
    """Display image for Tensor."""
    inp = inp.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    inp = std * inp + mean
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)  # pause a bit so that plots are updated


# Get a batch of training data
inputs, target = next(iter(dataloaders['train']))

imshow(inputs)

# Make a grid from batch
# out = torchvision.utils.make_grid(inputs)

# imshow(out)

# # Make a grid from batch
# out = torchvision.utils.make_grid(target['masks'])
# imshow(out, title=[y for y in [class_names[x]] for x in target['masks']])

# imshow(out)

RuntimeError: Caught RuntimeError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/home/blake/.local/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py", line 308, in _worker_loop
    data = fetcher.fetch(index)  # type: ignore[possibly-undefined]
  File "/home/blake/.local/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 51, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/home/blake/.local/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 51, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/tmp/ipykernel_35835/2566144181.py", line 36, in __getitem__
    mask.to(device)
  File "/home/blake/.local/lib/python3.10/site-packages/torch/cuda/__init__.py", line 279, in _lazy_init
    raise RuntimeError(
RuntimeError: Cannot re-initialize CUDA in forked subprocess. To use CUDA with multiprocessing, you must use the 'spawn' start method
