In [14]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data.dataset as td
import matplotlib.pyplot as plt
from torchvision import transforms
from torchsummary import summary
import numpy as np
import cv2
import os
import math

In [2]:
class CompCarDataset(td.Dataset):
    def __init__(self, train):
        postfix1 = "train" if train else "test"

        mean = [0.4695975, 0.47956536, 0.49133955] # Caclulated using the training set
        std = [0.07418917, 0.0721986, 0.07223192] # Calculated using the training set

        self.transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean, std),
            transforms.Pad((0, 121)),
            transforms.Resize((224, 224))
        ])

        self.path_image = "comp-cars/" + postfix1 + "/images"
        self.path_speed = "comp-cars/" + postfix1 + "/speed"
        self.path_type = "comp-cars/" + postfix1 + "/type"

        self.images = os.listdir(os.path.join(self.path_image))
        self.labels_speed = os.listdir(os.path.join(self.path_speed))
        self.labels_type = os.listdir(os.path.join(self.path_type))
        self.images.sort()
        self.labels_speed.sort()
        self.labels_type.sort()
    
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        imagefile = os.path.join(self.path_image, self.images[idx])
        speedfile = open(os.path.join(self.path_speed, self.labels_speed[idx]), "r")
        typefile = open(os.path.join(self.path_type, self.labels_type[idx]), "r")
        img = self.transform(cv2.imread(imagefile))
        spd = float(speedfile.read())
        typ = int(typefile.read())
        lbl = torch.zeros(24)
        lbl[typ - 1] = 1
        lbl[12 + math.floor((spd - 100)/(20))] = 1
        return img, lbl, spd


In [29]:
class BasicBlockResNet(nn.Module):
    def __init__(self, channels_in, channels_out, size_change):
        super().__init__()

        self.size_change = size_change

        self.activation = nn.ReLU()

        self.layers = nn.Sequential(
            nn.Conv2d(channels_in, channels_out, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            nn.BatchNorm2d(channels_out),
            nn.ReLU(),
            nn.Conv2d(channels_out, channels_out, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            nn.BatchNorm2d(channels_out)
        )

        self.resizing = nn.Sequential(
            nn.Conv2d(channels_in, channels_out, kernel_size=(1, 1), stride=(1, 1)),
            nn.BatchNorm2d(channels_out)
        )

    def forward(self, x):
        residual = x
        x = self.layers(x)
        if self.size_change:
            residual = self.resizing(residual)
        #x += residual
        x = self.activation(x)
        return x

In [22]:
#Final Tensor needs to be of Length 24 (12 car types, and 12 speed zones)
class ResNet18(nn.Module):
    def __init__(self, channels_in, sizes=[64, 128, 256, 512], num_classes=24):
        super().__init__()

        self.first_layer = nn.Sequential(
            nn.Conv2d(channels_in, sizes[0], kernel_size=(7, 7), stride=(2, 2), padding=3),
            nn.BatchNorm2d(sizes[0]),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(3, 3), stride=(2, 2), padding=1)
        )

        self.middle_layers = nn.Sequential(
            BasicBlockResNet(sizes[0], sizes[0], False),
            BasicBlockResNet(sizes[0], sizes[0], False),
            BasicBlockResNet(sizes[0], sizes[1], True),
            BasicBlockResNet(sizes[1], sizes[1], False),
            BasicBlockResNet(sizes[1], sizes[2], True),
            BasicBlockResNet(sizes[2], sizes[2], False),
            BasicBlockResNet(sizes[2], sizes[3], True),
            BasicBlockResNet(sizes[3], sizes[3], False)
        )

        self.final_layer = nn.Sequential(
            nn.AdaptiveAvgPool2d((1, 1)),
            nn.Flatten(),
            nn.Linear(sizes[3], num_classes)
        )

    def forward(self, x):        
        x = self.first_layer(x)
        x = self.middle_layers(x)
        x = self.final_layer(x)
        return x

In [30]:
network = ResNet18(3)
dataset = CompCarDataset(True)
img, lbl, spd = dataset.__getitem__(0)
img = torch.unsqueeze(img, 0)
final_lbl = network.forward(img)
print(final_lbl)

tensor([[ 0.4027, -0.0626,  0.1105, -0.1496, -0.1349,  0.1687, -0.0652, -0.2776,
         -0.6095,  0.1788,  0.1973, -0.1409, -0.0910,  0.1431, -0.5600,  0.1399,
         -0.2460,  0.1909,  0.0470,  0.3566, -0.2220,  0.3793,  0.3402, -0.1609]],
       grad_fn=<AddmmBackward>)


In [27]:
print(network)

ResNet18(
  (first_layer): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), padding=1, dilation=1, ceil_mode=False)
  )
  (middle_layers): Sequential(
    (0): BasicBlockResNet(
      (activation): ReLU()
      (layers): Sequential(
        (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(3, 3))
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU()
        (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(3, 3))
        (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (resizing): Sequential(
        (0): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )

In [31]:
summary(network, (3, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,472
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
            Conv2d-5           [-1, 64, 56, 56]          36,928
       BatchNorm2d-6           [-1, 64, 56, 56]             128
              ReLU-7           [-1, 64, 56, 56]               0
            Conv2d-8           [-1, 64, 56, 56]          36,928
       BatchNorm2d-9           [-1, 64, 56, 56]             128
             ReLU-10           [-1, 64, 56, 56]               0
 BasicBlockResNet-11           [-1, 64, 56, 56]               0
           Conv2d-12           [-1, 64, 56, 56]          36,928
      BatchNorm2d-13           [-1, 64, 56, 56]             128
             ReLU-14           [-1, 64,