In [1]:
import sys
sys.path.append("/home/pervinco/DL-workspace/VISION/object_detection/YoloV3")

In [2]:
import cv2
import torch
import random
import numpy as np

from data.util import resize_image_and_boxes, draw_boxes, xywh2xyxy

In [3]:
classes = ["aeroplane", "bicycle", "bird", "boat", 
           "bottle", "bus", "car", "cat", 
           "chair", "cow", "diningtable", "dog", 
           "horse", "motorbike", "person", "pottedplant", 
           "sheep", "sofa", "train", "tvmonitor"]

In [4]:
file_list_path = "/home/pervinco/DL-workspace/VISION/object_detection/YoloV3/data/train.txt"

with open(file_list_path, 'r') as f:
    image_files = f.readlines()

label_files = [path.replace('images', 'labels').replace('.png', '.txt').replace('.jpg', '.txt').replace('JPEGImages', 'labels') for path in image_files]

In [5]:
idx = random.randint(0, len(image_files)-1)

image_path = image_files[idx].strip()
label_path = label_files[idx].strip()

In [6]:
image = cv2.imread(image_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
annot = np.loadtxt(label_path).reshape(-1, 5)

class_ids = annot[:, 0].reshape(-1, 1).astype(np.int64)
boxes = annot[:, 1:]

print(image.shape)
print(annot)
print(class_ids)
print(boxes)

(500, 333, 3)
[[14.          0.48798799  0.546       0.51951952  0.456     ]]
[[14]]
[[0.48798799 0.546      0.51951952 0.456     ]]


In [7]:
image, boxes = resize_image_and_boxes(image, boxes, new_size=640)

height, width = image.shape[:2]
boxes = xywh2xyxy(boxes, height, width)
print(boxes)

[[146.06606607 203.52       478.55855856 495.36      ]]


In [8]:
draw_boxes(image, boxes, class_ids, classes)

In [9]:
target = np.concatenate((class_ids, boxes), axis=1)
print(target)

[[ 14.         146.06606607 203.52       478.55855856 495.36      ]]


In [10]:
from data.dataset import YoloDataset
from torch.utils.data import DataLoader

train_dataset = YoloDataset("/home/pervinco/DL-workspace/VISION/object_detection/YoloV3/data/train.txt",
                            img_size=416,
                            augment=True,
                            multiscale=True)

for paths, images, targets in train_dataset:
    print(images.shape)
    print(targets.shape)


    break

torch.Size([333, 500, 3])
torch.Size([1, 6])


In [11]:
train_dataloader = DataLoader(train_dataset, batch_size=2, shuffle=True, collate_fn=train_dataset.collate_fn)

for paths, images, targets in train_dataloader:
    print(paths)
    print(images.shape)
    print(targets.shape)
    print(targets)

    b, target_labels = targets[:, :2].long().t() ## object가 포함된 이미지 idx, class_label
    print(b)
    print(target_labels)

    for idx, image in enumerate(images):
        image = image.numpy()
        image = np.transpose(image, axes=(1, 2, 0)).astype(np.uint8).copy()
        img_height, img_width = image.shape[:2]

        annots = targets[targets[:, 0] == idx].numpy()
        img_idx = annots[:, 0]
        class_ids = annots[:, 1].astype(np.int64)
        boxes = annots[:, 2:]

        boxes = xywh2xyxy(boxes, img_height, img_width)
        print(class_ids)
        print(boxes)

        draw_boxes(image, boxes, class_ids, classes, name=f"sample_batch_{idx}.jpg")


    break

('/home/pervinco/Datasets/PASCAL_VOC/VOCDevkit/VOC2012/JPEGImages/2010_003604.jpg', '/home/pervinco/Datasets/PASCAL_VOC/VOCDevkit/VOC2012/JPEGImages/2009_004869.jpg')
torch.Size([2, 3, 608, 608])
torch.Size([4, 6])
tensor([[ 0.0000,  3.0000,  0.4013,  0.4990,  0.8027,  0.9940],
        [ 1.0000, 15.0000,  0.5202,  0.6548,  0.1952,  0.4006],
        [ 1.0000, 14.0000,  0.8932,  0.4877,  0.1317,  0.2185],
        [ 1.0000,  8.0000,  0.4645,  0.5000,  0.8091,  1.0000]])
tensor([0, 1, 1, 1])
tensor([ 3, 15, 14,  8])
[3]
[[  0.          1.2160025 488.02133   605.56805  ]]
[15 14  8]
[[256.89926  276.33514  375.6074   519.89215 ]
 [503.0439   230.0735   583.1093   362.9337  ]
 [ 36.426094   0.       528.36646  608.      ]]
