In [217]:
import os
import cv2
import numpy as np

import torch
from torch.utils.data import DataLoader

# from data.yolo_dataset import YoloDataset, collate_fn

In [230]:
from torch import nn


class YOLOv3Loss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss(reduction='none')
        self.bce = nn.BCEWithLogitsLoss(reduction='none')
        self.multiMargin = nn.MultiLabelSoftMarginLoss(reduction='none')        ## https://cvml.tistory.com/26
                                                                

        
    def forward(self, pred, target, scale, anchors):

        pred = pred.reshape(-1, 3, 85, scale, scale)
        pred = pred.permute(0, 1, 3, 4, 2)
        print(pred.shape)
        print(target.shape)

        ## no_obj_loss(No Object Loss):     Loss for objectness score      of non-object-assigned BBOXes
        ## is_obj_loss(Object Loss):        Loss for objectness score      of     object-assigned BBOXes
        ## coord_loss(Coordinates Loss):    Loss for predicted coordinates of     object-assigned BBOXes
        ## class_loss(Classification Loss): Loss for predicted class-ids   of     object-assigned BBOXes 

        is_assigned = pred[..., 4] == 1     ## tensor([(element == 1) for element in 4th column of pred])   ## e.g. tensor([True, False, False, ...])
        no_assigned = pred[..., 4] == 0     ## If use these boolean-list tensor as a indices,
                                            ##    we can extract the only rows from target(label) tensor -- whose 4th column element(objectness score) is 1-or-0

        print(pred[..., 4][no_assigned].shape)
        print(target[..., 4][no_assigned.shape])

        no_obj_loss = self.get_loss(pred[...,  4][no_assigned], target[...,  4][no_assigned], anchors, opt="NO_OBJ")
        is_obj_loss = self.get_loss(pred[...,  4][is_assigned], target[...,  4][is_assigned], anchors, opt="IS_OBJ")
        coord_loss =  self.get_loss(pred[..., :4][is_assigned], target[..., :4][is_assigned], anchors, opt="COORD")
        class_loss =  self.get_loss(pred[..., 5:][is_assigned], target[..., 5:][is_assigned], anchors, opt="CLASS")
        
        loss = no_obj_loss + is_obj_loss + coord_loss + class_loss
        return loss


    def get_loss(self, pred, target, anchors, opt):
        
        if opt == "NO_OBJ":
            loss = self.bce(pred, target)
            return loss

        elif opt == "IS_OBJ":
            loss = self.bce(torch.sigmoid(pred), target)            ## If use [wh_IOU * target] instead of [target], MSE loss is better . . . maybe.
            return loss                                             ##    cause [target] and [wh_IOU * target] values differ in "Discrete"/"Continuous"

        elif opt == "COORD":
            pred_bboxes =   torch.cat([torch.sigmoid(pred[..., 0:2]), torch.exp(pred[..., 2:4])          ], dim=1)
            target_bboxes = torch.cat([              pred[..., 0:2] ,          (pred[..., 2:4] / anchors)], dim=1)
            loss = self.mse(pred_bboxes, target_bboxes)
            return loss

        elif opt == "CLASS":
            loss = self.multiMargin(pred, target)
            return loss

In [219]:
from common.sampler import sampler
from common.utils import *

from torch.utils.data.dataset import Dataset

from PIL import Image
import torchvision

class YoloDataset(Dataset):
    def __init__(self,
                 dataset_option,
                 model_option,
                 split="train"):

        self.dataset_option = dataset_option
        self.model_option = model_option
        self.classes = self.dataset_option["DATASET"]["CLASSES"]

        
        dataset_name = dataset_option["DATASET"]["NAME"]

        assert split == "train" or split == "valid"
        
        assert dataset_name in ["ship", "yolo-dataset"]
                
        if dataset_name == "yolo-dataset" or dataset_name == "ship":
            if split == "train":
                dataset_type = "train"
            elif split == "valid":
                dataset_type = "valid"

        root = self.dataset_option["DATASET"]["ROOT"]
        self.split = split
        
        self.dataset = self.load_dataset(os.path.join(root, dataset_type))


    def __getitem__(self, idx):
        img_path, label_path = self.dataset[idx]

        ## load img
        img_file = Image.open(img_path)
        t = torchvision.transforms.Compose([torchvision.transforms.Resize((608, 608)), torchvision.transforms.ToTensor()])

        img_file = t(img_file)

        ## load label
        label_f = open(label_path, "r")

        labels = np.zeros((0, 5))
        if os.fstat(label_f.fileno()).st_size:
            labels = np.loadtxt(label_f, dtype="float")
            labels = labels.reshape(-1, 5)

        #############################################################################################
        ## transform "labels" - from np.shape(_, 5) to tensor(#ofS=3, A=3, S, S, 5 + class_offset) ##
        ## i.e., (1) add objectness score, (2) apply one-hot encoding to object ids                ##
        #############################################################################################
        num_anchors = self.model_option["YOLOv3"]["NUM_ANCHORS"]
        anchors = self.model_option["YOLOv3"]["ANCHORS"]
        scales = self.model_option["YOLOv3"]["SCALES"]
        class_offset = 80
        # class_offset = self.dataset_option["DATASET"]["NUM_CLASSES"]

        ##           tensor([# of S]=3,  [# of A]=3,     S,     S, 5 + class_offset)
        label_maps = [torch.zeros((num_anchors // 3, scale, scale, 5 + class_offset)) for scale in scales]
        for label in labels:
            obj_ids, gtBBOX = label[0], label[1:5]
            bx, by, bw, bh = gtBBOX
            
            ## (2) Create one-hot vector with list of object ids
            obj_vec = [0] * class_offset
            obj_vec[int(obj_ids)] = 1
            # for obj_id in obj_ids:
            #     obj_vec[int(obj_id)] = 1

            ## (1) Set objectness score
            ## . . . . before then, we should find (the correct cell_offset(Si: cy, Sj: cx) & the best-fitted anchor(Ai: pw, ph))
            ## . . . .                          -- where g.t. bbox(from label) be assigned
            ## . . . . => label_maps[idx of Scale: anchor assigned][idx of Anchor, Si, Sj, 4] = 1 ----- case of Best
            ## . . . . => label_maps[idx of Scale: anchor assigned][idx of Anchor, Si, Sj, 4] = -1 ---- case of Non-best (to be ignored)
            ## . . . . => DEFAULT = 0 ----------------------------------------------------------------- case of No-assigned
            ## 
            ## . . (1-1) How evaluate the "goodness" of anchor box
            ## . . . . .     is to compare "Similarity" between the anchor box and g.t. BBOX
            ## . . . . . => Calculate "width-and-height-based IOU" between anchBOX and gtBBOX
            ## . . . . . => Pick the anchBOX in descending order with whIOU value
            anchors_wh = torch.tensor(anchors).to(device='cpu').reshape(-1, 2)         ## (3, 3, 2) -> (9, 2)
            gtBBOX_wh = torch.tensor(gtBBOX[2:4]).to(device='cpu')
            wh_IOUs = width_height_IOU(anchors_wh, gtBBOX_wh)

            anchor_indices = wh_IOUs.argsort(descending=True, dim=0)

            ## Flag list for checking whether other anchor has been already picked in the scale
            is_scale_occupied = [False] * 3

            for anchor_index in anchor_indices:

                ## To mark the anchor
                ## . . (1) Get information of the anchor BBOX
                scale_idx = torch.div(anchor_index, len(scales), rounding_mode='floor')
                anch_idx_in_scale = anchor_index % len(scales)

                ## . . (2) then, Get cell information(Si: cy, Sj: cx) of the g.t.BBOX
                scale = scales[scale_idx]
                cx = int(bx * scale)          ## .....??
                cy = int(by * scale)
                gt_tx = bx * scale - cx
                gt_ty = by * scale - cy
                gtBBOX[0:2] = gt_tx, gt_ty

                ## Get record of the cell information in the scale
                ## . . to avoid overlapping bboxes
                is_cell_occupied = label_maps[scale_idx][anch_idx_in_scale, cy, cx,  4]

                if not is_cell_occupied and not is_scale_occupied[scale_idx]:       ## if there is no other overlapping-liked bbox and I'm the best
                    label_maps[scale_idx][anch_idx_in_scale, cy, cx,  4] = 1
                    label_maps[scale_idx][anch_idx_in_scale, cy, cx, :4] = torch.tensor(gtBBOX)
                    label_maps[scale_idx][anch_idx_in_scale, cy, cx, 5:] = torch.tensor(obj_ids)
                    is_scale_occupied[scale_idx] = True                             ## the best-fitted anchor has been picked in this scale
                
                elif wh_IOUs[anchor_index] > 0.5:
                    label_maps[scale_idx][anch_idx_in_scale, cy, cx,  4] = -1        ## this anchor is not the best, so we will ignore it

        return img_file, label_maps, img_path


    def __len__(self):
        return len(self.dataset)


    # def load_dataset(self, f_list_path):
    def load_dataset(self, dataset_path):
        image_set = []

        for r, _, f in os.walk(dataset_path):
            for file in f:
                if file.lower().endswith((".png", ".jpg", ".bmp", ".jpeg")):
                    # set paths - both image and label file
                    img_path = os.path.join(r, file).replace(os.sep, '/')
                    label_path = os.path.splitext(img_path)[0] + ".txt"

                    if not os.path.isfile(img_path) or not os.path.isfile(label_path):
                        continue
                                
                    image_set.append((img_path, label_path))
                
        return image_set
    

def collate_fn(batch):
        img_files = []
        labels_1 = []
        labels_2 = []
        labels_3 = []
        img_paths = []

        for b in batch:
            img_files.append(b[0])

            labels_1.append(b[1][0])
            labels_2.append(b[1][1])
            labels_3.append(b[1][2])

            img_paths.append(b[2])

        img_files = torch.stack(img_files, 0)

        labels_1 = torch.stack(labels_1, 0)
        labels_2 = torch.stack(labels_2, 0)
        labels_3 = torch.stack(labels_3, 0)
        img_labels = (labels_1, labels_2, labels_3)

        return img_files, img_labels, img_paths

In [220]:
def valid(
    model,
    valid_loader,
    model_option,
    epoch,
    # anchors,
    ):
    model.eval()
    true_pred_num = 0
    gt_num = 0

    for i, batch_img, batch_label, batch_img_path in enumerate(valid_loader, 0):

        pred = model(batch_img)

        ## Post-Processing?

        ## Get the number of both true predictions and ground truth


    ## Examine Accuracy
    acc = (true_pred_num / gt_num + 1e-16) * 100
    
    return acc

In [221]:
dataset_option = {  "DATASET": {
                        "NAME": "ship",
                        "ROOT": "C:/Users/ryyoon/MA_MSS/ship-tracking/datasets/ship",
                        "CLASSES": {
                            #    "선박": 0, "부표": 1, "어망부표": 2,
                            #    "해상풍력": 3, "등대": 4, "기타부유물" : 5
                               "선박": 0, "부표": 1, "어망부표": 1,
                               "해상풍력": 1, "등대": 1, "기타부유물" : 1
                        },
                        "NUM_CLASSES": 2
                     }
                 }

model_option = {"YOLOv3": {
                     "SCALES": [608 // 32, 608 // 16, 608 // 8],
                     "NUM_ANCHORS": 9,
                     "ANCHORS": [[( 12,  16), ( 19,  36), ( 40,  28)],
                                 [( 36,  75), ( 76,  55), ( 72, 146)],
                                 [(142, 110), (192, 243), (459, 401)]],
                    #  "ANCHORS": [[( 10, 13), ( 16,  30), ( 33,  23)],
                    #              [( 30, 61), ( 62,  45), ( 59, 119)],
                    #              [(116, 90), (156, 198), (373, 326)]]
                    }
               }

optim_option = {"OPTIMIZER": {
                     "METHOD": "adam",
                     "BATCH_SIZE": 4,
                     "EPOCHS": 1,
                     "LR": 1e-4,
                    }
               }

In [222]:
epochs = optim_option["OPTIMIZER"]["EPOCHS"]
batch_size = optim_option["OPTIMIZER"]["BATCH_SIZE"]

In [223]:
from darknet2pytorch import DarknetParser

cfg = './weights/darknet/yolov4.cfg'
weight = './weights/darknet/yolov4.weights'
model = DarknetParser(cfg, weight)

parse from './weights/darknet/yolov4.cfg'
done

load weights from : './weights/darknet/yolov4.weights'
Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
0 convolutional load weights : [0.004]/[245.779] mb
Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
1 convolutional load weights : [0.075]/[245.779] mb
Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
2 convolutional load weights : [0.092]/[245.779] mb
3 route        load weights : [0.092]/[245.779] mb
Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
4 convolutional load weights : [0.108]/[245.779] mb
Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
5 convolutional load weights : [0.117]/[245.779] mb
Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
6 convolutional load weights : [0.188]/[245.779] mb
7 shortcut     load weights : [0.188]/[245.779] mb
Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
8 

In [225]:
##############
## DATALOAD ##
##############
train_dataset = YoloDataset(dataset_option, model_option, split="valid")
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
valid_dataset = YoloDataset(dataset_option, model_option, split="valid")
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)

In [234]:
loss_function = YOLOv3Loss()

In [235]:
def train(
        model,
        train_loader,
        loss_func,
        dataset_option,
        model_option,
        epoch,
        # anchors,
        ):
    model.train()

    scales = torch.tensor(model_option["YOLOv3"]["SCALES"]).to(device='cpu')       ## [13, 26, 52]
    anchors = torch.tensor(model_option["YOLOv3"]["ANCHORS"]).to(device='cpu')

    for i, (batch_img, batch_label, batch_img_path) in enumerate(train_loader, 0):
        # batch_size = batch_img.size(0)
        
        #################
        ##  FORWARDING ##
        #################
        pred = model(batch_img)                                                      ### batch_img: tensor(   N, 3, 416, 416) . . . . . . . . . . . N = batch_size
        loss = ( loss_func(pred[0], batch_label[0], scales[0], anchors=anchors[0])    ######## pred: tensor(3, N, 3, S, S, 1 + 4 + class_offset) . . S = scale_size
               + loss_func(pred[1], batch_label[1], scales[1], anchors=anchors[1])    # batch_label: tensor(3, N, 3, S, S, 1 + 4 + class_offset)
               + loss_func(pred[2], batch_label[2], scales[2], anchors=anchors[2]) )  ##### anchors: tensor(3,    3,       2) . . . is list of pairs(anch_w, anch_h)

        #################
        ## BACKWARDING ##
        #################
        loss.backward()

In [236]:
for epoch in range(epochs):
    ###########
    ## TRAIN ##
    ###########
    train(  
            model,
            train_loader,
            loss_function,
            dataset_option,
            model_option,
            epoch,
            # anchors,
          )
        
    #######################
    ## VALID (INFERENCE) ##
    #######################
    acc = valid(
                 model,
                 valid_loader,
                 model_option,
                 epoch,
                 # anchors,
               )

    print(f"Epoch: ({epoch + 1}/{epochs}) . . . [acc: {acc:.2f}]")

torch.Size([64, 3, 19, 19, 85])
torch.Size([4, 3, 19, 19, 90])
torch.Size([0])


IndexError: index 64 is out of bounds for dimension 0 with size 4