In [1]:
import sys
sys.path.append("/home/pervinco/DL-workspace/VISION/object_detection/YoloV3")

In [2]:
import cv2
import torch
import random
import numpy as np

from data.util import resize_image_and_boxes, draw_boxes, xywh2xyxy

In [3]:
classes = ["aeroplane", "bicycle", "bird", "boat", 
           "bottle", "bus", "car", "cat", 
           "chair", "cow", "diningtable", "dog", 
           "horse", "motorbike", "person", "pottedplant", 
           "sheep", "sofa", "train", "tvmonitor"]

In [4]:
file_list_path = "/home/pervinco/DL-workspace/VISION/object_detection/YoloV3/data/train.txt"

with open(file_list_path, 'r') as f:
    image_files = f.readlines()

label_files = [path.replace('images', 'labels').replace('.png', '.txt').replace('.jpg', '.txt').replace('JPEGImages', 'labels') for path in image_files]

In [5]:
idx = random.randint(0, len(image_files)-1)

image_path = image_files[idx].strip()
label_path = label_files[idx].strip()

In [6]:
image = cv2.imread(image_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
annot = np.loadtxt(label_path).reshape(-1, 5)

class_ids = annot[:, 0].reshape(-1, 1).astype(np.int64)
boxes = annot[:, 1:]

print(image.shape)
print(annot)
print(class_ids)
print(boxes)

(375, 500, 3)
[[11.          0.654       0.52666667  0.496       0.94666667]]
[[11]]
[[0.654      0.52666667 0.496      0.94666667]]


In [7]:
image, boxes = resize_image_and_boxes(image, boxes, new_size=640)

height, width = image.shape[:2]
boxes = xywh2xyxy(boxes, height, width)
print(boxes)

[[259.84        34.13333333 577.28       640.        ]]


In [8]:
draw_boxes(image, boxes, class_ids, classes)

In [9]:
target = np.concatenate((class_ids, boxes), axis=1)
print(target)

[[ 11.         259.84        34.13333333 577.28       640.        ]]


In [10]:
from data.dataset import YoloDataset
from torch.utils.data import DataLoader

train_dataset = YoloDataset("/home/pervinco/DL-workspace/VISION/object_detection/YoloV3/data/train.txt",
                            img_size=416,
                            augment=True,
                            multiscale=True)

for paths, images, targets in train_dataset:
    print(images.shape)
    print(targets.shape)


    break

torch.Size([416, 416, 3])
torch.Size([1, 6])


In [11]:
train_dataloader = DataLoader(train_dataset, batch_size=2, shuffle=True, collate_fn=train_dataset.collate_fn)

for paths, images, targets in train_dataloader:
    print(paths)
    print(images.shape)
    print(targets.shape)

    print(targets)

    for idx, image in enumerate(images):
        image = image.numpy()
        annots = targets[targets[:, 0] == idx].numpy()
        
        img_idx = annots[:, 0]
        class_ids = annots[:, 1].astype(np.int64)
        boxes = annots[:, 2:]

        boxes = xywh2xyxy(boxes, image.shape[0], image.shape[1])
        print(class_ids)
        print(boxes)

        draw_boxes(image, boxes, class_ids, classes, name=f"sample_batch_{idx}.jpg")

    break

('/home/pervinco/Datasets/PASCAL_VOC/VOCDevkit/VOC2012/JPEGImages/2010_002409.jpg', '/home/pervinco/Datasets/PASCAL_VOC/VOCDevkit/VOC2012/JPEGImages/2008_001645.jpg')
torch.Size([2, 384, 384, 3])
torch.Size([5, 6])
tensor([[ 0.0000,  6.0000,  0.5403,  0.7812,  0.5194,  0.4375],
        [ 0.0000,  6.0000,  0.6097,  0.1635,  0.4194,  0.3229],
        [ 0.0000,  6.0000,  0.4375,  0.5135,  0.6028,  0.6604],
        [ 1.0000, 14.0000,  0.7044,  0.2716,  0.5297,  0.5432],
        [ 1.0000, 12.0000,  0.3617,  0.5375,  0.7234,  0.9250]])
[6 6 6]
[[107.73335   216.        307.2       384.       ]
 [153.6         0.7999935 314.6667    124.799995 ]
 [ 52.266655   70.4       283.73334   324.00003  ]]
[14 12]
[[168.79701    0.       372.2152   208.58102 ]
 [  0.        28.797089 277.7672   384.      ]]


In [None]:
stride = 32
anchors = {'scale1': [(10, 13), (16, 30), (33, 23)],
           'scale2': [(30, 61), (62, 45), (59, 119)],
           'scale3': [(116, 90), (156, 198), (373, 326)]}
num_anchors = len(anchors)


scaled_anchors = torch.as_tensor([(pw / stride, ph / stride) for pw, ph in anchors], dtype=torch.float)
print(f"scaled anchors : \n{scaled_anchors}")

anchor_w = scaled_anchors[:, 0:1].view((1, num_anchors, 1, 1))
anchor_h = scaled_anchors[:, 1:2].view((1, num_anchors, 1, 1))
print(f"anchor_w : \n{anchor_w}")
print(f"anchor_h : \n{anchor_h}")