In [1]:
import os

from common.parser import yaml_parser
from common.recoder import save_checkpoint
from data.yolo_dataset import *
from model.loss import YOLOv3Loss
from model.model import Darknet4YOLOv3

import torch
import torch.nn


In [2]:
import easydict

args = easydict.EasyDict({
    "config": "C:/Users/ryyoon/RY_GitHub/YOLO-v3/configs/model/yolov3.cfg",
    # "weight": "C:/Users/ryyoon/RY_GitHub/YOLO-v3/configs/darknet/yolov4.weights",
    "dataset": "C:/Users/ryyoon/RY_GitHub/YOLO-v3/configs/dataset/yolo_dataset.yml",
    "model": "C:/Users/ryyoon/RY_GitHub/YOLO-v3/configs/model/yolo_model.yml",
    "optimizer": "C:/Users/ryyoon/RY_GitHub/YOLO-v3/configs/optimizer/optimizer.yml",
    "weight_save_dir": "C:/Users/ryyoon/RY_GitHub/YOLO-v3/weights"
})


dataset_option = yaml_parser(args.dataset)
model_option = yaml_parser(args.model)
optimizer_option = yaml_parser(args.optimizer)

In [3]:
# device = torch.device('cpu')
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

###########################
## BUILD MODEL & LOSS_fn ##
###########################
# model = DarknetParser(args.config, args.weight)
model = Darknet4YOLOv3(args.config).to(device)

In [4]:
loss_function = YOLOv3Loss()

# optimizer = torch.optim.Adam(model.parameters(), lr=optimizer_option["OPTIMIZER"]["LR"])
# optimizer_option["OPTIMIZER"]["ITERS_PER_EPOCH"] = len(train_dataset) // optimizer_option["OPTIMIZER"]["BATCH_SIZE"]


In [5]:
from PIL import Image

root = "C:/Users/ryyoon/MA_MSS/ship-tracking/datasets/ship/validation"
path = "daecheon_20201113_0000_011.jpg"

img_path = os.path.join(root, path).replace(os.sep, "/")

img = Image.open(img_path)

import torchvision

t = torchvision.transforms.Compose([torchvision.transforms.Resize((608, 608)), torchvision.transforms.ToTensor()])

img = t(img).unsqueeze(0)

In [6]:
label_path = os.path.join(root, os.path.splitext(path)[0] + ".txt").replace(os.sep, "/")
f = open(label_path, "r")
labels = np.zeros((0, 5))
if os.fstat(f.fileno()).st_size:
    labels = np.loadtxt(f, dtype="float")
    labels = labels.reshape(-1, 5)

In [7]:
num_anchors = model_option["YOLOv3"]["NUM_ANCHORS"]
anchors = model_option["YOLOv3"]["ANCHORS"]
scales = model_option["YOLOv3"]["SCALES"]
class_offset = 80

In [8]:
label_maps = [torch.zeros((num_anchors // 3, scale, scale, 5 + class_offset), dtype=float) for scale in scales]
for label in labels:
    obj_ids, gtBBOX = label[0], label[1:5]
    bx, by, bw, bh = gtBBOX
    
    obj_vec = [0] * class_offset
    obj_vec[int(obj_ids)] = 1

    anchors_wh = torch.tensor(anchors).reshape(-1, 2)         ## (3, 3, 2) -> (9, 2)
    gtBBOX_wh = torch.tensor(gtBBOX[2:4])
    wh_IOUs = width_height_IOU(anchors_wh, gtBBOX_wh)

    anchor_indices = wh_IOUs.argsort(descending=True, dim=0)

    is_scale_occupied = [False] * 3

    for anchor_index in anchor_indices:

        scale_idx = torch.div(anchor_index, len(scales), rounding_mode='floor')
        anch_idx_in_scale = anchor_index % len(scales)

        scale = scales[scale_idx]
        cx = int(bx * scale)          ## .....??
        cy = int(by * scale)
        gt_tx = bx * scale - cx
        gt_ty = by * scale - cy
        gtBBOX[0:2] = gt_tx, gt_ty

        is_cell_occupied = label_maps[scale_idx][anch_idx_in_scale, cy, cx,  4]

        if not is_cell_occupied and not is_scale_occupied[scale_idx]:       ## if there is no other overlapping-liked bbox and I'm the best
            label_maps[scale_idx][anch_idx_in_scale, cy, cx, 4:5] = 1
            label_maps[scale_idx][anch_idx_in_scale, cy, cx, 0:4] = torch.tensor(gtBBOX, dtype=float)
            label_maps[scale_idx][anch_idx_in_scale, cy, cx, 5:] = torch.tensor(obj_ids)
            is_scale_occupied[scale_idx] = True                             ## the best-fitted anchor has been picked in this scale
        
        elif wh_IOUs[anchor_index] > 0.5:
            label_maps[scale_idx][anch_idx_in_scale, cy, cx,  4] = -1        ## this anchor is not the best, so we will ignore it


In [9]:
model.train()

Darknet4YOLOv3(
  (module_list): ModuleList(
    (0): Sequential(
      (conv_0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (batch_norm_0): BatchNorm2d(32, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
      (leaky_0): LeakyReLU(negative_slope=0.1)
    )
    (1): Sequential(
      (conv_1): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (batch_norm_1): BatchNorm2d(64, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
      (leaky_1): LeakyReLU(negative_slope=0.1)
    )
    (2): Sequential(
      (conv_2): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (batch_norm_2): BatchNorm2d(32, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
      (leaky_2): LeakyReLU(negative_slope=0.1)
    )
    (3): Sequential(
      (conv_3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (batch_norm_3): BatchNorm2d(64, eps=1e-05

In [10]:
scales = torch.tensor(model_option["YOLOv3"]["SCALES"]).to(device)       ## [13, 26, 52]
anchors = torch.tensor(model_option["YOLOv3"]["ANCHORS"]).to(device)

In [11]:
b_img = img.to(device)
b_label = [label.to(device).unsqueeze(0) for label in label_maps]

In [12]:
pred = model(b_img)

In [13]:
print(pred[0].shape, b_label[0].shape)
print(pred[1].shape, b_label[1].shape)
print(pred[2].shape, b_label[2].shape)

torch.Size([1, 3, 19, 19, 85]) torch.Size([1, 3, 19, 19, 85])
torch.Size([1, 3, 38, 38, 85]) torch.Size([1, 3, 38, 38, 85])
torch.Size([1, 3, 76, 76, 85]) torch.Size([1, 3, 76, 76, 85])


In [14]:
loss = ( loss_function(pred[0], b_label[0], scales[0], anchors[0])
        + loss_function(pred[1], b_label[1], scales[1], anchors[1])
        + loss_function(pred[2], b_label[2], scales[2], anchors[2]) )

loss /= 3

print(loss.item())

58378.5871441855


In [15]:
model.eval()
# pred = model(b_img)
# print(f"boxes: {pred[0].shape}")
# print(f"confs: {pred[1].shape}")

Darknet4YOLOv3(
  (module_list): ModuleList(
    (0): Sequential(
      (conv_0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (batch_norm_0): BatchNorm2d(32, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
      (leaky_0): LeakyReLU(negative_slope=0.1)
    )
    (1): Sequential(
      (conv_1): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (batch_norm_1): BatchNorm2d(64, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
      (leaky_1): LeakyReLU(negative_slope=0.1)
    )
    (2): Sequential(
      (conv_2): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (batch_norm_2): BatchNorm2d(32, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
      (leaky_2): LeakyReLU(negative_slope=0.1)
    )
    (3): Sequential(
      (conv_3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (batch_norm_3): BatchNorm2d(64, eps=1e-05

In [16]:
pred = model(b_img)
print(pred[0].shape)

torch.Size([1083, 6])


In [17]:
def coord_IOU(box_1, box_2, opt="center"):
    if opt == "center":
        ## center to corner
        box_1[..., 0:2] = torch.cat((box_1[..., 0:1] - box_1[..., 2:3] / 2, box_1[..., 1:2] - box_1[..., 3:4] / 2), dim=-1)
        box_2[..., 0:2] = torch.cat((box_2[..., 0:1] - box_2[..., 2:3] / 2, box_2[..., 1:2] - box_2[..., 3:4] / 2), dim=-1)

    x_left   = torch.max(box_1[..., 0:1], box_2[..., 0:1])
    y_bottom = torch.max(box_1[..., 1:2], box_2[..., 1:2])
    x_right  = torch.min(box_1[..., 0:1] + box_1[..., 2:3], box_2[..., 0:1] + box_1[..., 2:3])
    y_top    = torch.min(box_1[..., 1:2] + box_1[..., 3:4], box_2[..., 1:2] + box_1[..., 3:4])
        
    inter_w = (x_right - x_left).clamp(0)
    inter_h = (y_top - y_bottom).clamp(0)

    box_1_area = box_1[..., 2:3] * box_1[..., 3:4]
    box_2_area = box_2[..., 2:3] * box_2[..., 3:4]

    i = inter_w * inter_h
    u = box_1_area + box_2_area - i
    
    iou = i / (u + 1e-16)
    return iou

In [21]:
def NMS(pred, batch_size):
    surpressed_pred = []

    for batch_idx in range(batch_size):
        each_pred = [pred[0][batch_idx], pred[1][batch_idx], pred[2][batch_idx]]        ## [tensor(255, 76, 76), tensor(255, 38, 38), tensor(255, 19, 19)]

        # for i in range(3):
        #     each_pred[i] = each_pred[i].reshape(-1, each_pred[i].shape[-1])            
        each_pred = torch.cat(each_pred, dim=0)        ## pred = tensor(22743, 6)
        
        is_object = each_pred[..., 4] > 0.15      ## 0.15: nominal probability threshold --- https://nrsyed.com/2020/04/28/a-pytorch-implementation-of-yolov3-for-real-time-object-detection-part-1/#:~:text=By%20filtering%20out%20detections%20below%20some%20nominal%20probability%20threshold%20(e.g.%2C%200.15)%2C%20we%20eliminate%20most%20of%20the%20false%20positives.
        candis = each_pred[is_object]     ## get the only row vectors that are not the false positive

        candis_indices = torch.argsort(candis[..., 4], descending=True)
        candis = candis[candis_indices]

        elected_preds = []
        ## until there is no more duplicated bboxes
        while candis.shape[0] > 0:
            candi  = candis[0:1]
            others = candis[1: ]
        
            reshaped_candi = candi.repeat(others.shape[0], 1)
            coord_IOUs = coord_IOU(reshaped_candi, others)
        
            non_duplicated = coord_IOUs[..., 0] < 0.5
            candis = others[non_duplicated]
            elected_preds.append(candi)
            # print(f"# of candis: {candis.shape[0]}")
                        
        if len(elected_preds) != 0:
            elected_preds = torch.cat(tuple(elected_preds), dim=0)
        else:
            elected_preds = torch.tensor(elected_preds)
        surpressed_pred.append(elected_preds)

    return surpressed_pred

In [22]:
surpressed_pred = NMS(pred, batch_size=1)

In [23]:
print(len(label))
print(surpressed_pred[0][..., 4])

5
tensor([0.4937], device='cuda:0', grad_fn=<SelectBackward>)


In [27]:
print(label)
print(surpressed_pred[0][:, :4])

[0.         0.43229148 0.72314858 0.025      0.03287037]
tensor([[  0.4801,   0.4998, 144.3945, 112.8900]], device='cuda:0',
       grad_fn=<SliceBackward>)
