In [123]:
import torch
import json
import os
import numpy as np
import random
import cv2
from torch.utils.data import Dataset, DataLoader

In [74]:
with open("test.json", "r") as f:
    paths = json.load(f)

In [130]:
def fromfiles(filename, dataset):
    
    with open(filename, "r") as f:
        data = json.load(f)
    with open(dataset, "r") as f:
        labels = json.load(f)
        
    out = []
    for k in data["objects"]:
        c = labels[data["objects"][k]["name"]]
        tb = data["objects"][k]["bbox"]
        tb = [tb["xmin"], tb["ymin"], tb["xmax"], tb["ymax"]]
        b = [int(tb[0]), int(tb[1]), int(tb[2]), int(tb[3]), int(c)]
        out.append(b)
    return torch.from_numpy(np.array(out))


In [76]:
imgf = list(paths.keys())
tmp = imgf[random.randint(0, len(imgf)-1)]
box = fromfiles(paths[tmp], "./dataset_name.json")
img = cv2.imread(tmp)
img.shape, box.shape

((367, 500, 3), (1, 5))

In [50]:
box

array([[ 29, 102, 280, 351,   6],
       [ 52,  14, 266, 370,   0],
       [199, 121, 250, 204,  11]])

In [139]:
def convert_xyxy_xywh(boxes):
    x1 = boxes[..., 0]
    y1 = boxes[..., 1]
    x2 = boxes[..., 2]
    y2 = boxes[..., 3]
    c = boxes[..., 4]
    
    xc = (x1 + x2)/2
    yc = (y1 + y2)/2
    w = torch.abs(x2 - x1)
    h = torch.abs(y2 - y1)
    
    xc = xc.reshape(-1, 1)
    yc = yc.reshape(-1, 1)
    w = w.reshape(-1, 1)
    h = h.reshape(-1, 1)
    c = c.reshape(-1, 1)
    #print(xc.shape, yc.shape, w.shape, h.shape)
    
    out = torch.cat([xc, yc, w, h, c], dim=-1)
    return out

In [140]:
b = torch.from_numpy(box)
print(b)
b1 = convert_xyxy_xywh(b)
print(b1)

TypeError: expected np.ndarray (got Tensor)

In [141]:
def normalize(boxes, img_shape):
    x = boxes[..., 0]
    y = boxes[..., 1]
    w = boxes[..., 2]
    h = boxes[..., 3]
    c = boxes[..., 4]
    x = x/img_shape[1]
    y = y/img_shape[0]
    w = w/img_shape[1]
    h = h/img_shape[0]
    x = x.reshape(-1, 1)
    y = y.reshape(-1, 1)
    w = w.reshape(-1, 1)
    h = h.reshape(-1, 1)
    c = c.reshape(-1, 1)
    
    out = torch.cat([x, y, w, h, c], dim=-1)
    return out

In [107]:
def iou_w_h(b1, b2):
    inter = torch.min(b1[..., 0], b2[..., 0]) * torch.min(b1[..., 1], b2[..., 1])
    union = b1[..., 0] * b1[..., 1] + b2[..., 0] * b2[..., 1] - inter
    return inter / union

In [91]:
s = torch.from_numpy(np.array(img.shape))
b2 = normalize(b1, s)

In [109]:
stride = [2, 4, 8]
anchors =  torch.from_numpy(np.array([(0.28, 0.22), (0.38, 0.48), (0.9, 0.78)]))

In [115]:
targets = [torch.zeros((3, s1, s1, 6)) for s1 in stride]

In [116]:
targets[0].shape, targets[1].shape, targets[2].shape

(torch.Size([3, 2, 2, 6]), torch.Size([3, 4, 4, 6]), torch.Size([3, 8, 8, 6]))

In [119]:
for box in b2:
    an = iou_w_h(box[2:4], anchors)
    sort_an = an.argsort(descending=True, dim=0)
    for an_index in sort_an:
        

tensor(0.5355, dtype=torch.float64)
tensor(0.4852, dtype=torch.float64)
tensor(0.1809, dtype=torch.float64)
tensor(0.4393, dtype=torch.float64)
tensor(0.4262, dtype=torch.float64)
tensor(0.1484, dtype=torch.float64)
tensor(0.3606, dtype=torch.float64)
tensor(0.1265, dtype=torch.float64)
tensor(0.0329, dtype=torch.float64)


In [206]:
b2

tensor([[ 0.3090,  0.6172,  0.5020,  0.6785,  6.0000],
        [ 0.3180,  0.5232,  0.4280,  0.9700,  0.0000],
        [ 0.4490,  0.4428,  0.1020,  0.2262, 11.0000]])

In [None]:
## read x1, y1, x2, y2, class

## convert to centroid height width

## for a stride add correct labels

## add it for three strides 

## add collate fn

In [194]:
class Yolov3Dataset(Dataset):
    
    def __init__(self, paths, dataset, anchors, strides):
        super(Yolov3Dataset, self).__init__()
        with open(paths, "r") as f:
            self.paths = json.load(f)
        self.imgf = list(self.paths.keys())
        self.dataset = dataset
        self.anchors = torch.from_numpy(np.array(anchors))
        self.strides = strides
        self.num_anchors = self.anchors.shape[0]
        self.anchors_per_stride = self.num_anchors // len(self.strides)
        
    def __len__(self):
        return len(self.imgf)
    
    def __getitem__(self, idx):
        img = cv2.imread(self.imgf[idx])
        boxes = fromfiles(self.paths[self.imgf[idx]], self.dataset)
        boxes = convert_xyxy_xywh(boxes)
        boxes = normalize(boxes, img.shape)
        
        targets = [torch.zeros((self.anchors_per_stride, s, s, 6)) for s in self.strides]
        for box in boxes:
            an = iou_w_h(box[2:4], self.anchors)
            sort_an_idx = an.argsort(descending=True, dim=0)
            x,y,w,h,c = box
            has_an = [False]*self.anchors_per_stride
           
            for an_idx in sort_an_idx:
                scale_idx = an_idx // self.anchors_per_stride
               
                anchors_on_scale = an_idx % self.anchors_per_stride
                curr_stride = self.strides[scale_idx]
                i,j = int(curr_stride * x), int(curr_stride * y)
                
                anchor_val = targets[scale_idx][anchors_on_scale, i, j, 0]
                if not has_an[scale_idx] and not anchor_val:
                    targets[scale_idx][anchors_on_scale, i, j, 0] = 1
                    x, y = curr_stride * x - i, curr_stride * y - j
                    w, h = w * curr_stride, h * curr_stride
                    targets[scale_idx][anchors_on_scale, i, j, 1:5] = torch.tensor([x, y, w, h])
                    targets[scale_idx][anchors_on_scale, i, j, 5] = c
                    has_an[scale_idx] = True
                    
                elif not anchor_val and an[an_idx]:
                    targets[scale_idx][anchors_on_scale, i, j, 0] = -1
                    
                    
        return img, targets
                
        

In [213]:
dataset = Yolov3Dataset("./pascal_paths.json", "./dataset_name.json", anchors, [2,3,4])

In [214]:
anchors = [
    (0.28, 0.22), (0.38, 0.48), (0.9, 0.78),
    (0.07, 0.15), (0.15, 0.11), (0.14, 0.29),
    (0.02, 0.03), (0.04, 0.07), (0.08, 0.06),
] 

In [215]:
img, target = dataset[10]

  scale_idx = an_idx // self.anchors_per_stride


In [216]:
img.shape

(500, 334, 3)

In [217]:
target[0].shape, target[1].shape, target[2].shape

(torch.Size([3, 2, 2, 6]), torch.Size([3, 3, 3, 6]), torch.Size([3, 4, 4, 6]))

In [218]:
target[0]

tensor([[[[ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
          [ 1.0000,  0.6108,  0.4800,  1.1856,  3.5040,  8.0000]],

         [[-1.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000]]],


        [[[ 1.0000,  0.2006,  0.7280,  0.3892,  1.4480,  0.0000],
          [ 1.0000,  0.9731,  0.4040,  1.1078,  1.1920,  8.0000]],

         [[ 1.0000,  0.0359,  0.9260,  1.1856,  1.8440,  0.0000],
          [ 1.0000,  0.7515,  0.4840,  0.4970,  1.0320,  8.0000]]],


        [[[ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000]],

         [[-1.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
          [ 1.0000,  0.7545,  0.0120,  0.4910,  1.8720,  0.0000]]]])

In [219]:
target[1]

tensor([[[[-1.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
          [-1.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
          [-1.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000]],

         [[-1.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
          [-1.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000]],

         [[-1.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000]]],


        [[[-1.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
          [ 1.0000,  0.1078,  0.7780,  3.5569,  5.5320,  0.0000]],

         [[-1.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
          [-1.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000]],

In [220]:
target[2]

tensor([[[[-1.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
            0.0000e+00],
          [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
            0.0000e+00],
          [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
            0.0000e+00],
          [-1.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
            0.0000e+00]],

         [[-1.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
            0.0000e+00],
          [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
            0.0000e+00],
          [-1.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
            0.0000e+00],
          [-1.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
            0.0000e+00]],

         [[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
            0.0000e+00],
          [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
           