In [1]:
import os

from common.parser import yaml_parser
from common.recoder import save_checkpoint
from data.yolo_dataset import *
from model.MyYOLOv3 import YOLOv3Loss
from model.darknet2pytorch import DarknetParser

import torch
import torch.nn


In [2]:
import easydict

args = easydict.EasyDict({
    "config": "./configs/darknet/yolov4.cfg",
    "weight": "./configs/darknet/yolov4.weights",
    "dataset": "./configs/dataset/yolo_dataset.yml",
    "model": "./configs/model/yolo_model.yml",
    "optimizer": "./configs/optimizer/optimizer.yml",
    "weight_save_dir": "./weights"
})


dataset_option = yaml_parser(args.dataset)
model_option = yaml_parser(args.model)
optimizer_option = yaml_parser(args.optimizer)


In [3]:
from model.darknet2pytorch import DarknetParser

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model = DarknetParser(args.config, args.weight).to(device)

parse from 'C:/Users/ryyoon/RY_GitHub/Lets_Pytorch/YOLO-v3/configs/darknet/yolov4.cfg'
done

load weights from : 'C:/Users/ryyoon/RY_GitHub/Lets_Pytorch/YOLO-v3/configs/darknet/yolov4.weights'
Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
0 convolutional load weights : [0.004]/[245.779] mb
Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
1 convolutional load weights : [0.075]/[245.779] mb
Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
2 convolutional load weights : [0.092]/[245.779] mb
3 route        load weights : [0.092]/[245.779] mb
Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
4 convolutional load weights : [0.108]/[245.779] mb
Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
5 convolutional load weights : [0.117]/[245.779] mb
Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
6 convolutional load weights : [0.188]/[245.779] mb
7 shortcut     load weigh

In [4]:
loss_function = YOLOv3Loss()

# optimizer = torch.optim.Adam(model.parameters(), lr=optimizer_option["OPTIMIZER"]["LR"])
# optimizer_option["OPTIMIZER"]["ITERS_PER_EPOCH"] = len(train_dataset) // optimizer_option["OPTIMIZER"]["BATCH_SIZE"]


In [5]:
from PIL import Image

root = "C:/Users/ryyoon/MA_MSS/ship-tracking/datasets/ship/validation"
path = "daecheon_20201113_0000_011.jpg"

img_path = os.path.join(root, path).replace(os.sep, "/")

img = Image.open(img_path)

import torchvision

t = torchvision.transforms.Compose([torchvision.transforms.Resize((608, 608)), torchvision.transforms.ToTensor()])

img = t(img).unsqueeze(0)

In [6]:
label_path = os.path.join(root, os.path.splitext(path)[0] + ".txt").replace(os.sep, "/")
f = open(label_path, "r")
labels = np.zeros((0, 5))
if os.fstat(f.fileno()).st_size:
    labels = np.loadtxt(f, dtype="float")
    labels = labels.reshape(-1, 5)

In [7]:
num_anchors = model_option["YOLOv3"]["NUM_ANCHORS"]
anchors = model_option["YOLOv3"]["ANCHORS"]
scales = model_option["YOLOv3"]["SCALES"]
class_offset = 80

In [8]:
label_maps = [torch.zeros((num_anchors // 3, scale, scale, 5 + class_offset)) for scale in scales]
for label in labels:
    obj_ids, gtBBOX = label[0], label[1:5]
    bx, by, bw, bh = gtBBOX
    
    obj_vec = [0] * class_offset
    obj_vec[int(obj_ids)] = 1

    anchors_wh = torch.tensor(anchors).reshape(-1, 2)         ## (3, 3, 2) -> (9, 2)
    gtBBOX_wh = torch.tensor(gtBBOX[2:4])
    wh_IOUs = width_height_IOU(anchors_wh, gtBBOX_wh)

    anchor_indices = wh_IOUs.argsort(descending=True, dim=0)

    is_scale_occupied = [False] * 3

    for anchor_index in anchor_indices:

        scale_idx = torch.div(anchor_index, len(scales), rounding_mode='floor')
        anch_idx_in_scale = anchor_index % len(scales)

        scale = scales[scale_idx]
        cx = int(bx * scale)          ## .....??
        cy = int(by * scale)
        gt_tx = bx * scale - cx
        gt_ty = by * scale - cy
        gtBBOX[0:2] = gt_tx, gt_ty

        is_cell_occupied = label_maps[scale_idx][anch_idx_in_scale, cy, cx,  4]

        if not is_cell_occupied and not is_scale_occupied[scale_idx]:       ## if there is no other overlapping-liked bbox and I'm the best
            label_maps[scale_idx][anch_idx_in_scale, cy, cx,  4] = 1
            label_maps[scale_idx][anch_idx_in_scale, cy, cx, :4] = torch.tensor(gtBBOX)
            label_maps[scale_idx][anch_idx_in_scale, cy, cx, 5:] = torch.tensor(obj_ids)
            is_scale_occupied[scale_idx] = True                             ## the best-fitted anchor has been picked in this scale
        
        elif wh_IOUs[anchor_index] > 0.5:
            label_maps[scale_idx][anch_idx_in_scale, cy, cx,  4] = -1        ## this anchor is not the best, so we will ignore it


In [9]:
model.train()

DarknetParser(
  (module_list): ModuleList(
    (0): Sequential(
      (conv_0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (batch_norm_0): BatchNorm2d(32, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
      (mish_0): Mish()
    )
    (1): Sequential(
      (conv_1): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (batch_norm_1): BatchNorm2d(64, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
      (mish_1): Mish()
    )
    (2): Sequential(
      (conv_2): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (batch_norm_2): BatchNorm2d(64, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
      (mish_2): Mish()
    )
    (3): Sequential(
      (route_3): Identity()
    )
    (4): Sequential(
      (conv_4): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (batch_norm_4): BatchNorm2d(64, eps=1e-05, momentum=0.9, affine=True, track

In [10]:
scales = torch.tensor(model_option["YOLOv3"]["SCALES"]).to(device)       ## [13, 26, 52]
anchors = torch.tensor(model_option["YOLOv3"]["ANCHORS"]).to(device)

In [11]:
b_img = img.to(device)
b_label = [label.to(device).unsqueeze(0) for label in label_maps]

In [12]:
pred = model(b_img)

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


In [13]:
print(pred[0].shape, b_label[0].shape)
print(pred[1].shape, b_label[1].shape)
print(pred[2].shape, b_label[2].shape)

torch.Size([1, 255, 76, 76]) torch.Size([1, 3, 19, 19, 85])
torch.Size([1, 255, 38, 38]) torch.Size([1, 3, 38, 38, 85])
torch.Size([1, 255, 19, 19]) torch.Size([1, 3, 76, 76, 85])


In [14]:
loss = ( loss_function(pred[2], b_label[0], scales[0], anchors[0])
        + loss_function(pred[1], b_label[1], scales[1], anchors[1])
        + loss_function(pred[0], b_label[2], scales[2], anchors[2]) )

loss /= 3

print(loss.item())

0.6919001936912537
