# Finding out the output of 'match' function

In [1]:
from data import *
from utils.augmentations import SSDAugmentation
import torch
from torch.autograd import Variable
from box_utils import match, point_form, jaccard, match, intersect, encode
from box_utils import log_sum_exp
from SSD_generate_anchors import generate_ssd_priors

import collections
from pathlib import Path
from torch.utils.data import DataLoader
import numpy as np

In [2]:
root = Path.home()/'Documents'/'DATASETS'/'VOCdevkit'
cfg = voc

In [3]:
dataset = VOCDetection(root = root,
                      transform=SSDAugmentation(cfg['min_dim'], MEANS))

In [4]:
batch_size = 7

data_loader = DataLoader(dataset, batch_size,
                          num_workers=2,
                          shuffle=True,
                          collate_fn=detection_collate,
                          pin_memory=True)
batch_iterator = iter(data_loader)

In [5]:
SSDBoxSizes = collections.namedtuple('SSDBoxSizes', ['min', 'max'])

Spec = collections.namedtuple('Spec', ['feature_map_size', 'shrinkage', 'box_sizes', 
                                       'aspect_ratios'])

specs = [
    Spec(38, 8, SSDBoxSizes(30, 60), [2]),
    Spec(19, 16, SSDBoxSizes(60, 111), [2, 3]),
    Spec(10, 32, SSDBoxSizes(111, 162), [2, 3]),
    Spec(5, 64, SSDBoxSizes(162, 213), [2, 3]),
    Spec(3, 100, SSDBoxSizes(213, 264), [2]),
    Spec(1, 300, SSDBoxSizes(264, 315), [2])
]

priors = generate_ssd_priors(specs, clip=True)

In [6]:
priors = torch.Tensor(priors)
# priors = point_form(priors)

In [7]:
priors

tensor([[0.0133, 0.0133, 0.1000, 0.1000],
        [0.0133, 0.0133, 0.1414, 0.1414],
        [0.0133, 0.0133, 0.1414, 0.0707],
        ...,
        [0.5000, 0.5000, 0.9612, 0.9612],
        [0.5000, 0.5000, 1.0000, 0.6223],
        [0.5000, 0.5000, 0.6223, 1.0000]])

In [8]:
for idx, (image, target) in enumerate(batch_iterator):
#     print(image.size(0))
    bs    = image.size(0)
    loc_t = torch.Tensor(batch_size, 8732, 4)
    conf_t = torch.LongTensor(batch_size, 8732)
    
    for i in range(bs):
        truths = target[i][:,:-1].data
        labels = target[i][:,-1].data
        defaults = priors.data

        match(0.6, truths, defaults, [0.1,0.2], labels,
                      loc_t, conf_t, idx)
    
    if idx == 0:
        break

2 8732
4 8732
1 8732
2 8732
1 8732
3 8732
4 8732


In [9]:
labels

tensor([14., 14., 14., 14.])

In [10]:
conf_t.size()

torch.Size([7, 8732])

In [11]:
loc_t.size()

torch.Size([7, 8732, 4])

In [12]:
conf_t

tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]])

In [13]:
pos = conf_t > 0
print(pos.size())
num_pos = pos.sum(dim=1, keepdim=True)
print(num_pos.size())

torch.Size([7, 8732])
torch.Size([7, 1])


In [14]:
num_pos

tensor([[8],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0]])

In [15]:
num_pos.size()

torch.Size([7, 1])

In [16]:
random_data = np.random.randn(batch_size, 8732, 4)
loc_data    = torch.Tensor(random_data)
print(loc_data.size())

pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
print(pos_idx.size())

loc_p = loc_data[pos_idx].view(-1, 4)
loc_t = loc_t[pos_idx].view(-1, 4)

print(loc_p.size())
print(loc_t.size())

torch.Size([7, 8732, 4])
torch.Size([7, 8732, 4])
torch.Size([8, 4])
torch.Size([8, 4])


In [17]:
loc_p


tensor([[ 0.7192, -0.4729,  0.5207, -1.3896],
        [ 1.3222,  0.3499,  0.9224, -0.7630],
        [ 0.6483, -1.5990, -0.7446,  0.0196],
        [ 1.5696, -1.0870, -0.5336,  1.1965],
        [-0.2910,  0.9180, -0.9050, -0.4472],
        [-1.0179,  0.6931, -0.9298, -0.1200],
        [ 2.0300,  0.7415,  0.4269, -0.3017],
        [ 0.2663, -0.2492, -1.2191, -0.5398]])

In [18]:
loc_t

tensor([[-1.9182,  0.7773,  0.9942,  0.2671],
        [-0.0750,  1.1226,  1.2403,  1.1469],
        [ 1.0331, -0.0512, -1.7410, -0.5957],
        [-1.9726, -0.6284,  0.1717,  0.0156],
        [-1.9182, -0.8871,  0.9942,  0.2671],
        [-0.0750, -0.9159,  1.2403,  1.1469],
        [ 0.3964,  1.1028, -0.8961, -1.6233],
        [ 0.3964, -1.1781, -0.8961, -1.6233]])

In [19]:
conf_data = np.random.randint(low= 0, high= 20, size= (batch_size, 8732, 21))
conf_data = torch.Tensor(conf_data)

In [20]:
batch_conf = conf_data.view(-1, 21)
loss_c = log_sum_exp(batch_conf) - torch.gather(batch_conf,1, conf_t.view(-1, 1))

In [24]:
loss_c

tensor([[13.7442],
        [ 7.7956],
        [12.8854],
        ...,
        [ 2.2873],
        [15.1825],
        [14.5848]])

In [26]:
loss_c = loss_c.view(batch_size, -1)

In [27]:
loss_c[pos] = 0

In [28]:
_, loss_idx = loss_c.sort(1, descending=True)

In [32]:
_, idx_rank = loss_idx.sort(1)
print(idx_rank.size())

torch.Size([7, 8732])


In [34]:
num_pos = pos.sum(1 , keepdim = True)

In [36]:
num_neg = torch.clamp(3 * num_pos, max=pos.size(1) - 1)
print(num_neg)

tensor([[24],
        [ 0],
        [ 0],
        [ 0],
        [ 0],
        [ 0],
        [ 0]])


In [38]:
neg = idx_rank < num_neg.expand_as(idx_rank)
print(neg)

tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], dtype=torch.uint8)


In [39]:
pos_idx = pos.unsqueeze(2).expand_as(conf_data)
neg_idx = neg.unsqueeze(2).expand_as(conf_data)
print(pos_idx.size())
print(neg_idx.size())

torch.Size([7, 8732, 21])
torch.Size([7, 8732, 21])


In [42]:
conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1, 21)
print(conf_p.size())

torch.Size([32, 21])


In [43]:
targets_weighted = conf_t[(pos+neg).gt(0)]

In [44]:
N = num_pos.data.sum()

In [None]:
loss_l /= N
loss_c /= N
