In [1]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torchvision
from torchvision.transforms import v2
from torchvision.datasets import VOCDetection
import src.backbones.vgg as vgg
import src.models.tinyssd as tinyssd
import src.models.helpers as helpers
import src.utils.loss as loss

from src.models.helpers import multibox_prior, match_anchors_to_gt
from src.utils.loss import TinySSDLoss

import importlib

In [20]:
importlib.reload(vgg)
importlib.reload(tinyssd)
importlib.reload(helpers) 
importlib.reload(loss)


<module 'src.utils.loss' from 'c:\\Users\\rooty\\OU Research\\RefNet\\RefNet\\RefNet\\src\\utils\\loss.py'>

In [3]:
transforms = v2.Compose([
    v2.ToImage(),
    v2.ToDtype(torch.float32, scale=True),
    v2.RandomResizedCrop(size=(224, 224), antialias=True),
    v2.RandomHorizontalFlip(p=0.5),
    v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

trainset = VOCDetection('./datasets/VOC', image_set='train', download=True, year='2012', transforms=transforms)
valset = VOCDetection('./datasets/VOC', image_set='val', download=True, year='2012', transforms=transforms)

Using downloaded and verified file: ./datasets/VOC\VOCtrainval_11-May-2012.tar
Extracting ./datasets/VOC\VOCtrainval_11-May-2012.tar to ./datasets/VOC
Using downloaded and verified file: ./datasets/VOC\VOCtrainval_11-May-2012.tar
Extracting ./datasets/VOC\VOCtrainval_11-May-2012.tar to ./datasets/VOC


In [2]:
tinySSD = tinyssd.TinySSD(num_classes=20, imgsz=(224,224), sizes=[[.2,.4, .3], [.6, .5,.7,.8]], ratios=[[1,0.5,2],[1,0.5,2]], device='cuda')


Loaded Pretrianed weights for VGG16


In [8]:
x = torch.zeros((1,3,224,224)).to('cuda')
y = vgg.VGG16(True).to('cuda')(x)
[y.shape for y in y]


Loaded Pretrianed weights for VGG16


[torch.Size([1, 512, 28, 28]), torch.Size([1, 1024, 14, 14])]

In [3]:
x = torch.zeros((1,3,224,224)).to('cuda')
y = tinySSD(x)

In [14]:
[a.shape for a in tinySSD.anchors]

[torch.Size([1, 5488, 4]), torch.Size([1, 1568, 4])]

In [4]:
def collate_fn(batch):
    """
    Since each image may have a different number of objects, we need a collate function (to be passed to the DataLoader).
    This describes how to combine these tensors of different sizes. We use lists.
    Note: this need not be defined in this Class, can be standalone.
    :param batch: an iterable of N sets from __getitem__()
    :return: a tensor of images, lists of varying-size tensors of bounding boxes, labels, and difficulties
    """
    images = []
    targets = []

    for img, target in batch:
        images.append(img)
    
        boxes = [[int(obj['bndbox']['xmin']), int(obj['bndbox']['ymin']), int(obj['bndbox']['xmax']), int(obj['bndbox']['ymax'])] for obj in target['annotation']['object']]
        boxes = torch.tensor(boxes)
        
        cls = torch.as_tensor([1] * len(target['annotation']['object']), dtype=torch.int64)  # Assuming all objects are of class 1
        label = torch.cat([cls.view(-1, 1), boxes], dim=1)
        
        targets.append(label)

    images = torch.stack(images, dim=0)
    return images, targets

In [1]:
NUM_EPOCHS = 100
OPTIMIZER = torch.optim.SGD(tinySSD.parameters(), lr=0.001, weight_decay=5e-4) 
CRITERION = TinySSDLoss()
DEVICE = 'cuda'
DATALOADER = DataLoader(trainset, batch_size=2, drop_last=True, shuffle=True, collate_fn=collate_fn)

NameError: name 'torch' is not defined

In [54]:
next(iter(DATALOADER))

(tensor([[[[ 0.5734,  0.5296,  0.9763,  ...,  1.8039,  1.8251,  1.8305],
           [ 0.6339,  0.6738,  0.9052,  ...,  1.8386,  1.8193,  1.8071],
           [ 0.6040,  0.7154,  1.0721,  ...,  1.8435,  1.8252,  1.8150],
           ...,
           [-0.6243, -0.6888, -0.6263,  ..., -0.6318, -0.7024, -0.4625],
           [-0.4738, -0.3907, -0.5397,  ..., -0.6939, -0.7700, -0.8206],
           [-0.5558, -0.4248, -0.4982,  ..., -1.0553, -0.6425, -0.6018]],
 
          [[ 0.0421,  0.0450,  0.3082,  ...,  2.0581,  2.0755,  2.1026],
           [ 0.0238,  0.1086,  0.3522,  ...,  2.0386,  2.0462,  2.0516],
           [ 0.0940,  0.1292,  0.3932,  ...,  2.0831,  2.0541,  2.0385],
           ...,
           [-0.5554, -0.5790, -0.4002,  ..., -0.4891, -0.5656, -0.5001],
           [-0.7069, -0.5699, -0.6027,  ..., -0.6230, -0.6308, -0.6523],
           [-0.5416, -0.4716, -0.4839,  ..., -0.8163, -0.5814, -0.4862]],
 
          [[-0.2386, -0.3848, -0.0206,  ...,  2.3760,  2.3967,  2.4201],
           [-

In [None]:
for epoch in range(NUM_EPOCHS):
    tinySSD.to(DEVICE)
    tinySSD.train()

    for i, (imgs, targets) in enumerate(DATALOADER):
        imgs, targets = imgs.to(DEVICE), targets.to(DEVICE)
        outputs = tinySSD(imgs)
        targets = tinySSD.match(targets) # update targets with matched anchors
        loss = CRITERION(outputs, targets)
        OPTIMIZER.zero_grad()
        loss.backward()
        OPTIMIZER.step()
        print(f'Epoch [{epoch+1}/{NUM_EPOCHS}], Step [{i+1}/{len(DATALOADER)}], Loss: {loss.item():.4f}')
    

In [18]:
gt_boxes = torch.tensor([[[10,20,30,40],[5,10,20,30],[100,100,150,150],[200,200,222,222]]]).to('cuda')
# batch size = 1
# num gt = 4
# num anchors = 5488
anchors = tinySSD.anchors

ious = helpers.calculate_ious(anchors[0] * 224, gt_boxes)
# best_ious, idxs = torch.max(ious, dim=-1)
# valid_match_indices = best_ious > 0.5
# print(valid_match_indices)


In [19]:
(ious)

tensor([[[0.0456, 0.0498, 0.0771,  ..., 0.0000, 0.0000, 0.0000],
         [0.1194, 0.0374, 0.0664,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.1870, 0.0603, 0.0603]]],
       device='cuda:0')

In [19]:
anchors[0].shape

torch.Size([5488, 4])

In [18]:
anchors:torch.Tensor = anchors[0].to('cuda')
anchors.shape

torch.Size([5488, 4])

In [31]:
gt_boxes = torch.tensor([[[10,20,30,40],[100,100,150,150],[200,200,250,250]], [[10,20,30,40],[100,100,150,150],[200,200,250,250]]]).to('cuda')
anchors = tinySSD.anchors
anchors:torch.Tensor = anchors[0].to('cuda')

bs = gt_boxes.shape[0]
num_gt = gt_boxes.shape[1]
num_a = anchors.shape[0]

print(f'bs = {bs}')
print(f'num_gt = {num_gt}')
print(f'num_a = {num_a}')

area_gt_boxes = ((gt_boxes[:, :, 2] - gt_boxes[:, :, 0]) * (gt_boxes[:, :, 3] - gt_boxes[:, :, 1])).view(bs, num_gt, 1).expand(-1, -1, num_a)
area_anchors = ((anchors[:, 2] - anchors[:, 0]) * (anchors[:, 3] - anchors[:, 1])).expand(num_gt, -1).expand(bs, -1, -1)



# gt_boxes = gt_boxes.view(bs, num_gt, 1, 4).expand(-1, -1, num_a, -1) # shape is (batch_size, num_gt_boxes, num_anchors, 4)
# anchors = anchors.expand(num_gt, -1, -1).expand(bs, -1, -1, -1) # shape is (batch_size, num_gt_boxes, num_anchors, 4)

print(area_gt_boxes.shape)
print(area_anchors.shape)

bs = 2
num_gt = 3
num_a = 5488
torch.Size([2, 3, 5488])
torch.Size([2, 3, 5488])


In [23]:
inter_x1 = torch.max(anchors[:, :, :, 0], gt_boxes[:, :, :, 0]) # shape is (batch_size, num_gt_boxes, num_anchors)
inter_y1 = torch.max(anchors[:, :, :, 1], gt_boxes[:, :, :, 1]) # shape is (batch_size, num_gt_boxes, num_anchors)
inter_x2 = torch.min(anchors[:, :, :, 2], gt_boxes[:, :, :, 2]) # shape is (batch_size, num_gt_boxes, num_anchors)   
inter_y2 = torch.min(anchors[:, :, :, 3], gt_boxes[:, :, :, 3]) # shape is (batch_size, num_gt_boxes, num_anchors)

torch.Size([2, 3, 5488])

In [25]:
inter_width = torch.max(torch.tensor(0.0), inter_x2 - inter_x1) # shape is (batch_size, num_gt_boxes, num_anchors, 1)
inter_height = torch.max(torch.tensor(0.0), inter_y2 - inter_y1) # shape is (batch_size, num_gt_boxes, num_anchors, 1)
inter_area = inter_width * inter_height

In [26]:
inter_area.shape

torch.Size([2, 3, 5488])