In [None]:
import os
from tqdm import tqdm
import argparse

import sys
sys.path.append(os.path.dirname(os.path.abspath(os.path.dirname("sample_train.ipynb"))))
from common.parser import yaml_parser
from common.recoder import save_checkpoint
from common.cocoeval4yolo import CocoEval
# from common.utils import compress_label_map
from data.yolo_dataset import *
# from model.loss import YOLOv3Loss
from model.model import Darknet4YOLOv3
from model.darknet2pytorch import DarknetParser, yolo_forward_dynamic

import torch
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter



In [None]:
from common.utils import coord_IOU


class YOLOv3Loss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss(reduction='none')
        self.bce = nn.BCEWithLogitsLoss(reduction='none')
        self.multiMargin = nn.MultiLabelSoftMarginLoss(reduction='none')        ## https://cvml.tistory.com/26
                                                                

        
    def forward(self, pred, target, scale, anchors, logger, n_iter):
        
        # pred = pred.reshape(-1, 3, 85, scale, scale)
        # pred = pred.permute(0, 1, 3, 4, 2)

        ## no_obj_loss(No Object Loss):     Loss for objectness score      of non-object-assigned BBOXes
        ## is_obj_loss(Object Loss):        Loss for objectness score      of     object-assigned BBOXes
        ## coord_loss(Coordinates Loss):    Loss for predicted coordinates of     object-assigned BBOXes
        ## class_loss(Classification Loss): Loss for predicted class-ids   of     object-assigned BBOXes 
        
        is_assigned = target[..., 4] == 1     ## tensor([(element == 1) for element in 4th column of target])   ## e.g. tensor([True, False, False, ...])
        no_assigned = target[..., 4] == 0     ## If use these boolean-list tensor as indices,
                                              ##    we can extract the only rows from target(label) tensor -- whose 4th column element(objectness score) is 1-or-0

    
        no_obj_loss = self.get_loss(pred[..., 4:5][no_assigned], target[..., 4:5][no_assigned], opt="NO_OBJ")
        no_obj_loss = get_sum(no_obj_loss)
        
        logger.add_scalar('train/no_obj_loss', no_obj_loss.item(), n_iter)
        if not (True in is_assigned):
            return no_obj_loss

        ## Before indexing, do inverting the prediction equations to the whole coordinates:(x, y, w, h) vectors
        anchors = anchors.unsqueeze(0).unsqueeze(0).reshape((1, 3, 1, 1, 2))
        scaled_pred = torch.cat([torch.sigmoid(pred[..., :2]), torch.exp(pred[..., 2:4]) * anchors, pred[..., 4:5]], dim=-1)
        scaled_target = torch.cat([          target[..., :2],          target[..., 2:4]  * scale, target[..., 4:5]], dim=-1)
        
        is_obj_loss = self.get_loss(   scaled_pred[is_assigned],    scaled_target[is_assigned], opt="IS_OBJ")
        coord_loss =  self.get_loss(pred[..., 0:4][is_assigned], target[..., 0:4][is_assigned], opt="COORD")
        class_loss =  self.get_loss(pred[..., 5: ][is_assigned], target[..., 5: ][is_assigned], opt="CLASS")

        is_obj_loss = get_sum(is_obj_loss)
        coord_loss = get_sum(coord_loss)
        class_loss = get_sum(class_loss)
    
        total_loss = (no_obj_loss
                    + is_obj_loss
                    + coord_loss
                    + class_loss) / 4

        logger.add_scalar('train/is_obj_loss', is_obj_loss.item(), n_iter)
        logger.add_scalar('train/coord_loss', coord_loss.item(), n_iter)
        logger.add_scalar('train/class_loss', class_loss.item(), n_iter)

        return total_loss


    def get_loss(self, pred, target, opt):
        
        if opt == "NO_OBJ":
            loss = self.bce(torch.sigmoid(pred), target)
            return loss

        elif opt == "IS_OBJ":
            ## Get iou values between predBBOX and gtBBOX
            ## Because...
            ## (1) These loss-calculations are done at grid-cell scale
            ## (2) and 'objectness score(confidence score)' indicates how much 
            iou = coord_IOU(pred[..., 0:4], target[..., 0:4])
            loss = self.mse(pred[..., 4:5], iou * target[..., 4:5])    ## If use [iou * target] instead of [target], MSE loss is better . . . maybe.
            return loss                                          ##    cause [target] and [iou * target] values differ in "Discrete"/"Continuous"

        elif opt == "COORD":
            loss = self.mse(pred, target) / 4
            return loss

        elif opt == "CLASS":
            num_classes = target.shape[-1]
            loss = self.multiMargin(pred.reshape(-1, num_classes), target.reshape(-1, num_classes))
            return loss


def get_sum(loss):
    return loss.sum() / loss.shape[0] if loss.shape[0] != 0 else loss.sum()

In [None]:
def train(
        model,
        train_loader,
        loss_func,
        optimizer,
        optim_option,
        model_option,
        device,
        epoch,
        lr_scheduler,
        # scaler,
        logger
        ):
    model.train()

    scales = torch.tensor(model_option["YOLOv3"]["SCALES"]).to(device)       ## [19, 38, 76]
    anchors = torch.tensor(model_option["YOLOv3"]["ANCHORS"]).to(device)

    # for i, batch_input in enumerate(train_loader, 0):
    for i, batch_input in enumerate(tqdm(train_loader, desc="train")):
        n_iteration = (optim_option["OPTIMIZER"]["ITERS_PER_EPOCH"] * epoch) + i

        batch_img = batch_input["img"].to(device)
        batch_label = [label.to(device) for label in batch_input["label_map"]]
        
        #################
        ##  FORWARDING ##
        #################
        
        with torch.cuda.amp.autocast():
            pred = model(batch_img)                                                       ### batch_img: tensor(   N, 3, 608, 608) . . . . . . . . . . . N = batch_size
            loss = ( loss_func(pred[0], batch_label[0], scales[0], anchors[0], logger, n_iteration)    ######## pred: tensor(3, N, 3, S, S, 1 + 4 + class_offset) . . S = scale_size
                   + loss_func(pred[1], batch_label[1], scales[1], anchors[1], logger, n_iteration)    # batch_label: tensor(3, N, 3, S, S, 1 + 4 + class_offset)
                   + loss_func(pred[2], batch_label[2], scales[2], anchors[2], logger, n_iteration) )  ##### anchors: tensor(3,    3,       2) . . . is list of pairs(anch_w, anch_h)

        # pred = model(batch_img)                                                       ### batch_img: tensor(   N, 3, 608, 608) . . . . . . . . . . . N = batch_size
        # loss = ( loss_func(pred[0], batch_label[0], scales[0], anchors[0], logger, n_iteration)    ######## pred: tensor(3, N, 3, S, S, 1 + 4 + class_offset) . . S = scale_size
        #         + loss_func(pred[1], batch_label[1], scales[1], anchors[1], logger, n_iteration)    # batch_label: tensor(3, N, 3, S, S, 1 + 4 + class_offset)
        #         + loss_func(pred[2], batch_label[2], scales[2], anchors[2], logger, n_iteration) )  ##### anchors: tensor(3,    3,       2) . . . is list of pairs(anch_w, anch_h)

        total_loss = loss / 3

        logger.add_scalar('train/total_loss', total_loss.item(), n_iteration)

        # print(f"loss: {total_loss}")

        #################
        ## BACKWARDING ##
        #################
        # scaler.scale(total_loss).backward()
        # scaler.step(optimizer)
        # optimizer.zero_grad()

        # lr_scheduler.step()
        # scaler.update()
        
        total_loss.backward()
        optimizer.step()
        
        if n_iteration > 500:
            lr_scheduler.step()
        else:
            lr = optimizer_option["OPTIMIZER"]["LR"] * float(n_iteration) / 100
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr        

In [None]:
def valid(
        model,
        valid_loader,
        model_option,
        device,
        epoch,
        logger
        ):
    model.eval()

    scales = torch.tensor(model_option["YOLOv3"]["SCALES"]).to(device)       ## [19, 38, 76]
    anchors = torch.tensor(model_option["YOLOv3"]["ANCHORS"]).to(device)
    coco_evaluator = CocoEval()

    with torch.no_grad():
        # for i, batch_input in enumerate(valid_loader, 0):
        for i, batch_input in enumerate(tqdm(valid_loader, desc="valid")):
            batch_img = batch_input["img"].to(device)
            batch_size = len(batch_input["img_path"])

            pred = model(batch_img)
            coco_evaluator.update(batch_input, pred)

    ## Examine Accuracy
    mean_average_precision = coco_evaluator.eval()
    logger.add_scalar('test/map', mean_average_precision, epoch)

    return mean_average_precision


In [None]:
import easydict

args = easydict.EasyDict({
    "config": "C:/Users/ryyoon/RY_GitHub/YOLO-v3/configs/model/yolov3.cfg",
    # "config": "C:/Users/ryyoon/RY_GitHub/YOLO-v3/configs/darknet/yolov4.cfg",
    "weight": "C:/Users/ryyoon/RY_GitHub/YOLO-v3/configs/darknet/yolov4.weights",
    "dataset": "C:/Users/ryyoon/RY_GitHub/YOLO-v3/configs/dataset/yolo_dataset.yml",
    "model": "C:/Users/ryyoon/RY_GitHub/YOLO-v3/configs/model/yolo_model.yml",
    "optimizer": "C:/Users/ryyoon/RY_GitHub/YOLO-v3/configs/optimizer/optimizer.yml",
    "weight_save_dir": "C:/Users/ryyoon/RY_GitHub/YOLO-v3/weights"
})


dataset_option = yaml_parser(args.dataset)
model_option = yaml_parser(args.model)
optimizer_option = yaml_parser(args.optimizer)

In [None]:
######################
## BUILD DATALOADER ##
######################
# train_set_num, train_loader, _ = build_DataLoader(dataset_option, model_option, optimizer_option)

train_dataset = YoloDataset(dataset_option, model_option, split="train")
valid_dataset = YoloDataset(dataset_option, model_option, split="valid")
train_loader = DataLoader(train_dataset,
                          batch_size=optimizer_option["OPTIMIZER"]["BATCH_SIZE"],
                          shuffle=True,
                          collate_fn=collate_fn,
                          drop_last=True)
valid_loader = DataLoader(valid_dataset,
                          batch_size=optimizer_option["OPTIMIZER"]["BATCH_SIZE"],
                          shuffle=True,
                          collate_fn=collate_fn,
                          drop_last=True)

In [None]:
# device = torch.device('cpu')
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

###########################
## BUILD MODEL & LOSS_fn ##
###########################
# model = DarknetParser(args.config, args.weight)
model = Darknet4YOLOv3(args.config).to(device)
model = torch.nn.DataParallel(model)
# loss_function = yolo_forward_dynamic
loss_function = YOLOv3Loss()

In [None]:
param_groups = model.module.parameters() if isinstance(model, nn.DataParallel) else model.parameters()

optimizer = torch.optim.Adam(
    param_groups,
    lr=optimizer_option["OPTIMIZER"]["LR"],
)
for param_group in optimizer.param_groups:
    param_group['lr'] = 0.
optimizer_option["OPTIMIZER"]["ITERS_PER_EPOCH"] = len(train_dataset) // optimizer_option["OPTIMIZER"]["BATCH_SIZE"]

In [None]:
# total_iter = optimizer_option["OPTIMIZER"]["ITERS_PER_EPOCH"] * optimizer_option["OPTIMIZER"]["EPOCHS"]
total_iter = optimizer_option["OPTIMIZER"]["ITERS_PER_EPOCH"] * 10

lr_scheduler = torch.optim.lr_scheduler.LambdaLR(
    optimizer,
    lr_lambda=lambda epoch: 0.95 ** epoch
)

# Creates scaler once at the beginning of training
scaler = torch.cuda.amp.GradScaler()

logger = SummaryWriter()

if not os.path.isdir(args.weight_save_dir):
    os.makedirs(args.weight_save_dir)

In [None]:
# epochs = optimizer_option["OPTIMIZER"]["EPOCHS"]
epochs = 9

for epoch in range(epochs):
    ###########
    ## TRAIN ##
    ###########
    train(
            model,
            train_loader,
            loss_function,
            optimizer,
            optimizer_option,
            model_option,
            device,
            epoch,
            lr_scheduler,
        #     scaler,
            logger,
    )
    
    ###########
    ## VALID ##
    ###########
    mAP = valid(
                    model,
                    valid_loader,
                    model_option,
                    device,
                    epoch,
                    logger
    )
    
    print(f"Epoch: ({epoch + 1}/{epochs}) . . . [mAP: {mAP}]")
    save_checkpoint(epoch,
                    mAP,
                    model,
                    optimizer,
                    # lr_scheduler,
                    # scaler,
                    path=args.weight_save_dir
                    )


In [None]:
# for epoch in range(3):
#     train(
#         model,
#         train_loader,
#         loss_function,
#         optimizer,
#         optimizer_option,
#         model_option,
#         device,
#         epoch,
#         logger
#     )

# model.eval()

# scales = torch.tensor(model_option["YOLOv3"]["SCALES"]).to(device)       ## [19, 38, 76]
# anchors = torch.tensor(model_option["YOLOv3"]["ANCHORS"]).to(device)
# coco_evaluator = CocoEval()

# with torch.no_grad():
#     # for i, batch_input in enumerate(valid_loader, 0):
#     for i, batch_input in enumerate(tqdm(valid_loader, desc="valid")):
#         batch_img = batch_input["img"].to(device)
#         batch_size = len(batch_input["img_path"])

#         pred = model(batch_img)
#         coco_evaluator.update(batch_input, pred)

# # Examine Accuracy
# mean_average_precision = coco_evaluator.eval()
# logger.add_scalar('test/map', mean_average_precision, 1)
# print(f"Epoch: ({1 + 0}/{1}) . . . [mAP: {mean_average_precision:.2f}]")
