In [1]:
import os
import sys
import numpy as np
import time
import datetime
import json
import importlib
import shutil

import torch
import torch.nn as nn

from yolo_model import yoloModel
from yolo_model_dcnn import dcnnyoloModel
from PASCAL_Dataloader import create_split_loaders
from YOLO_Loss import YoloLoss
from utils import NMS
from bbox import bbox_iou

In [None]:
def evaluate(config, architecture='Baseline'):
    
    # Check if your system supports CUDA
    use_cuda = torch.cuda.is_available()
    
    # Setup GPU optimization if CUDA is supported
    if use_cuda:
        computing_device = torch.device("cuda")
        extras = {"num_workers": 3, "pin_memory": True}
        print("CUDA is supported")
    else: # Otherwise, train on the CPU
        computing_device = torch.device("cpu")
        extras = False
        print("CUDA NOT supported")
        
    # Load and initialize network
    if architecture == "Baseline":
        net = yoloModel(config)
    elif architecture == "DeformConv":
        net = dcnnyoloModel(config)
    else:
        print('Error: Incorrect architecture')
    
    net = net.to(computing_device)

    # Restore pretrain model
    if config["pretrain_snapshot"]:
        state_dict = torch.load(config["pretrain_snapshot"])
        net.load_state_dict(state_dict)
    else:
        print("Error: missing pretrain_snapshot")

    # Calculate YOLO loss at 3 different scales
    YOLO_losses = []
    for i in range(3):
        YOLO_losses.append(YoloLoss(config["classes"], (config["img_w"], config["img_h"]), config["anchors"][i]))

    # Test DataLoader
    root_dir = os.getcwd()
    train_loader, val_loader, test_loader = create_split_loaders(config['batch_size'])

    # Start the eval loop
    print("Start eval.")
    count = 0
    correct = 0
    for n, samples in enumerate(test_loader):
        images, labels = samples["image"], samples["label"]
        images, labels = images.to(computing_device), labels.to(computing_device)
        
        with torch.no_grad():
            outputs = net(images)
            list_of_outputs = []
            for i in range(3):
                list_of_outputs.append(YOLO_losses[i](outputs[i]))
            final_output = torch.cat(list_of_outputs, 1)
            final_output = NMS(final_output, config["classes"], conf_thresh=0.2)
            
            #  Calculate mAP
            for i in range(labels.size(0)):
                
                # Get the labels for samples where the width is not zero
                t_samp = labels[i, labels[i, :, 3] != 0]
                for obj_class, t_x, t_y, t_w, t_h in t_samp:
                    count += 1
                    
                    # Obtain rescaled ground truth coordinates
                    t_xmin, t_xmax = config["img_w"] * (t_x - t_w / 2), config["img_w"] * (t_x + t_w / 2)
                    t_ymin, t_ymax = config["img_h"] * (t_y - t_h / 2), config["img_h"] * (t_y + t_h / 2)
                    
                    ground_truth_box = torch.cat([coord.unsqueeze(0) for coord in [t_xmin, t_ymin, t_xmax, t_ymax]]).view(1, -1)
                    ground_truth_box = ground_truth_box.float()
                    samp_pred = final_output[i]
                    if samp_pred is not None:
                        
                        # Find IOU of predictions where the class predicted is same as ground truth
                        for xmin, ymin, xmax, ymax, conf, obj_conf, obj_pred in samp_pred[samp_pred[:, 6] == obj_class.float()]:
                            box_pred = torch.cat([coord.unsqueeze(0) for coord in [xmin, ymin, xmax, ymax]]).view(1, -1)
                            #print('Pred:',box_pred)
                            #print('GT:',box_gt)
                            iou = bbox_iou(box_pred, ground_truth_box)
                            if iou >= config["confidence_threshold"]:
                                correct += 1
                                break
        if count:
            print('Batch [%d/%d] mAP: %.5f' % (n, len(test_loader), float(correct / count)))

    print('Mean Average Precision: %.5f' % float(correct / count))

In [None]:
def main():

    config = {}
    config["batch_size"] = 16
    config['backbone_name'] = "darknet_53"
    config['backbone_pretrained'] = ""
    config['anchors'] = [[[116, 90], [156, 198], [373, 326]],
                                [[30, 61], [62, 45], [59, 119]],
                                [[10, 13], [16, 30], [33, 23]]]
    config['classes'] = 20
    config['img_h'] = config['img_w'] = 416
    config['confidence_threshold'] = 0.5
    config['pretrain_snapshot'] = "deformconv_weights.pth"
    config['classes_names_path'] = "./data/voc.names"

    # Start training
    evaluate(config, architecture='DeformConv')

if __name__ == "__main__":
    main()

CUDA is supported
Start eval.
Batch [0/130] mAP: 0.73529
Batch [1/130] mAP: 0.70000
Batch [2/130] mAP: 0.67153
Batch [3/130] mAP: 0.64216
Batch [4/130] mAP: 0.62348
Batch [5/130] mAP: 0.62162
Batch [6/130] mAP: 0.62029
Batch [7/130] mAP: 0.61942
Batch [8/130] mAP: 0.60731
Batch [9/130] mAP: 0.61588
Batch [10/130] mAP: 0.62351
Batch [11/130] mAP: 0.62289
Batch [12/130] mAP: 0.61217
Batch [13/130] mAP: 0.61011
Batch [14/130] mAP: 0.61265
Batch [15/130] mAP: 0.61029
Batch [16/130] mAP: 0.60328
