In [1]:
# coding='utf-8'
import os
import sys
import numpy as np
import time
import datetime
import json
import importlib
import logging
import shutil
import time

import torch
import torch.nn as nn

from yolo_model import yoloModel
from yolo_model_dcnn import dcnnyoloModel
from PASCAL_Dataloader import create_split_loaders
from YOLO_Loss import YoloLoss
from utils import NMS, remove_nulls, convert_to_xywh
from bbox import non_max_suppression
from detect import plot_detections


In [2]:
config = {}
config["batch_size"] = 16
config['backbone_name'] = "darknet_53"
config['backbone_pretrained'] = ""
config['anchors'] = [[[116, 90], [156, 198], [373, 326]],
                            [[30, 61], [62, 45], [59, 119]],
                            [[10, 13], [16, 30], [33, 23]]]
config['classes'] = 20
config['img_h'] = config['img_w'] = 416
config['confidence_threshold'] = 0.5
config['nms_threshold'] = 0.4
config['root_dir'] = '/datasets/ee285f-public/PascalVOC2012'

'''
UPDATE THESE VALUES!

'''
config['pretrain_snapshot'] = "./states/20190531004422/model.pth" # "./states/20190530222509/model.pth"
config['output_dir'] = './detected_images/'
model = 'Baseline'  

In [3]:
CLASS_ID = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", \
            "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", \
            "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]

In [4]:
_, _, test_loader = create_split_loaders(root_dir=config['root_dir'], batch_size=config['batch_size'])
print('There are %d batches in test set. This only runs the first 15 batches.' %len(test_loader))
print('To run the entire test set, set is_break to False')

There are 130 batches in test set. This only runs the first 15 batches.
To run the entire test set, set is_break to False


In [5]:
def test(config, test_loader, output_dir=config['output_dir'], architecture = 'Baseline', is_break=True):
    # Creates the output folder for the results if not already created.
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

    # Load and initialize network
    if architecture == "Baseline":
        net = yoloModel(config).cuda()
    elif architecture == "DeformConv":
        net = dcnnyoloModel(config).cuda()
    else:
        raise Exception("Error: Incorrect architecture")

    # Restore pretrain model
    if config["pretrain_snapshot"]:
        print('Loading model...')
        logging.info("load checkpoint from {}".format(config["pretrain_snapshot"]))
        state_dict = torch.load(config["pretrain_snapshot"])
        net.load_state_dict(state_dict)
    else:
        raise Exception("missing pretrain_snapshot!!!")


    # YOLO loss with 3 scales
    yolo_losses = []
    for i in range(3):
        yolo_losses.append(YoloLoss(config["classes"], (config["img_w"], config["img_h"]), config["anchors"][i]))
        
    # Start inference
    print('Ready for detections!')             
    for batch, sample in enumerate(test_loader):
        if batch >=15 and is_break:
            print('\nbreaking')
            break
            
        images, labels = sample["image"], sample["label"]
        orig_W, orig_H = sample["orig_img"][0], sample["orig_img"][1]
        target_labels = sample["target_label"]
        imgfiles = sample["imgfile"]
        
        # Create image pathfiles
        imgpaths = [os.path.join(config["root_dir"], 'JPEGImages', x) for x in imgfiles]
        
        orig_W = orig_W.to('cpu').numpy()
        orig_H = orig_H.to('cpu').numpy()
        images = images.cuda()
        
        # Run images through model, get predictions, and apply NMS.
        with torch.no_grad():
            outputs = net(images)
            output_list = []
            for i in range(3):
                output_list.append(yolo_losses[i](outputs[i]))
            detections = torch.cat(output_list, 1)
            detections = non_max_suppression(detections, 20)

        for ii, detection in enumerate(detections): ## 0 to 15
        # ------------------------------------- #
        # Parsing targets and detections labels #
        # ------------------------------------- #
            print('\nbatch %d, sample %d' %(batch,ii))
            targets = convert_to_xywh(remove_nulls(target_labels[ii,:].to('cpu').numpy())) #70x5
            print('target labels (cls, x, y, w, h)\n', targets)           
            
            if detection is not None:
                # Rescale coordinates to original dimensions
                RATIO_W = orig_W[ii] / config["img_w"]
                RATIO_H = orig_H[ii] / config["img_h"]
            
                predictions = np.zeros((len(detection),5), dtype=int)
                for jj, (x1, y1, x2, y2, conf, cls_conf, cls_pred) in enumerate(detection):        
                    x = int(x1 * RATIO_W)
                    y = int(y1 * RATIO_H)
                    w = int(abs(x2 - x1) * RATIO_W)
                    h = int(abs(y2 - y1) * RATIO_H)
                    predictions[jj,:] = (int(cls_pred), x, y, w, h) 
                print('predicted labels (cls, x, y, w, h)\n', predictions)
                
            else:
                predictions = []
                print('predicted labels (cls, x, y, w, h) - No objects detected!!')
            
            # ----------------------------------------- #
            # Plot targets and detections bounding boxes #
            # ------------------------------------------ #
            plot_detections(imgpaths[ii], targets, predictions, \
                        batch=batch, sample=ii, output_dir=output_dir, is_demo=False)
    
    print('Finished object detections! Check the results.')




In [6]:
if __name__ == "__main__":
    test(config,test_loader,architecture = model)

Loading model...
Ready for detections!

batch 0, sample 0
target labels (cls, x, y, w, h)
 [[  1 232 174 156 152]]
predicted labels (cls, x, y, w, h)
 [[  1 261 181 117 129]]

batch 0, sample 1
target labels (cls, x, y, w, h)
 [[ 11 202 151 107  73]]
predicted labels (cls, x, y, w, h)
 [[ 11 189 146 113  91]]

batch 0, sample 2
target labels (cls, x, y, w, h)
 [[  6   1 193 147 138]
 [  5 100  32 338 282]
 [ 14 437 206  30  83]
 [ 14 468 210  15  46]
 [ 14 481 209  18  47]]
predicted labels (cls, x, y, w, h)
 [[  6 -26 176 174 148]
 [ 14 455 211  27  53]
 [ 14 441 208  26  56]]

batch 0, sample 3
target labels (cls, x, y, w, h)
 [[  4  95 181  30  92]
 [ 14  49  71  99 174]
 [ 14 248 115 125 117]
 [ 14 318  96 182 230]
 [ 10   1 205 410 121]]
predicted labels (cls, x, y, w, h)
 [[ 14  47  70 106 179]
 [ 14 336 106 132 206]
 [ 14 236 107 120 117]]

batch 0, sample 4
target labels (cls, x, y, w, h)
 [[ 13  73 102 370 254]
 [ 14 199  56 147 298]
 [  6 449  52  38  14]
 [  6 409  58  23   




batch 1, sample 4
target labels (cls, x, y, w, h)
 [[  2 264 153  35  47]
 [  2 207 173  57  30]
 [  2 184 148  33  43]
 [  2  84 195  22  33]
 [  2 129 143  31  52]
 [  2 152 156  29  36]]
predicted labels (cls, x, y, w, h) - No objects detected!!

batch 1, sample 5
target labels (cls, x, y, w, h)
 [[  2 336 224  38  67]
 [  2 134 234  96  51]
 [  2 174 176  78  43]
 [  2  46 169  75  49]
 [  2 398  44  80  50]
 [  2 142 121  97  41]
 [  2  79  93  82  40]
 [  2   1 104  34  50]
 [  2   1 242  46  46]
 [  2 221 284  78  76]
 [  2 369 146  38  51]
 [  2 445 222  55  26]
 [  2 247  15  51  39]]
predicted labels (cls, x, y, w, h)
 [[  2  82 108  37  37]
 [  2 114 110  42  34]
 [  2 399  47  77  49]
 [  2 260  22  46  37]
 [  2  67 161  47  56]]

batch 1, sample 6
target labels (cls, x, y, w, h)
 [[  3  73 133 361  60]]
predicted labels (cls, x, y, w, h)
 [[   3 -164   95  834  128]]

batch 1, sample 7
target labels (cls, x, y, w, h)
 [[ 11  42  89 404 309]
 [ 15   1   1 135 309]]
predic


batch 3, sample 15
target labels (cls, x, y, w, h)
 [[  2  93  90 232 285]]
predicted labels (cls, x, y, w, h) - No objects detected!!

batch 4, sample 0
target labels (cls, x, y, w, h)
 [[  1  93  67 332 227]]
predicted labels (cls, x, y, w, h) - No objects detected!!

batch 4, sample 1
target labels (cls, x, y, w, h)
 [[ 13 337 196  68  45]
 [ 14 294 184  22  58]
 [ 14 191 216  46  58]
 [ 14 276 180  19  62]
 [  6 146 162 130  94]]
predicted labels (cls, x, y, w, h)
 [[  6 232 196  34  35]
 [ 13 342 193  49  46]
 [ 14 281 183  31  50]]

batch 4, sample 2
target labels (cls, x, y, w, h)
 [[  7 100 112 175 366]]
predicted labels (cls, x, y, w, h)
 [[  7 102 122 127 340]]

batch 4, sample 3
target labels (cls, x, y, w, h)
 [[ 14 178 103 100 255]
 [ 14 273  88  94 267]
 [ 19  35 196  69  71]
 [  8 343 228  56 102]
 [  8 241 221  49 106]]
predicted labels (cls, x, y, w, h) - No objects detected!!

batch 4, sample 4
target labels (cls, x, y, w, h)
 [[ 17   1 160 161 215]
 [  8 117 129  95


batch 7, sample 3
target labels (cls, x, y, w, h)
 [[  1   8   1 367 470]]
predicted labels (cls, x, y, w, h) - No objects detected!!

batch 7, sample 4
target labels (cls, x, y, w, h)
 [[ 14   1   1 434 374]]
predicted labels (cls, x, y, w, h)
 [[  14 -131  -49  757  471]]

batch 7, sample 5
target labels (cls, x, y, w, h)
 [[ 14   1  80 300 295]
 [ 14 178  64 322 311]
 [ 19  28  98  42  34]]
predicted labels (cls, x, y, w, h)
 [[ 14 200  65 250 305]]

batch 7, sample 6
target labels (cls, x, y, w, h)
 [[  7  22   9 449 453]]
predicted labels (cls, x, y, w, h)
 [[  7 118  82 259 311]]

batch 7, sample 7
target labels (cls, x, y, w, h)
 [[  2 330  70 154  92]
 [  2  23  47 141  96]]
predicted labels (cls, x, y, w, h)
 [[  2  29  54 128 105]]

batch 7, sample 8
target labels (cls, x, y, w, h)
 [[  7  26  28 395 294]]
predicted labels (cls, x, y, w, h) - No objects detected!!

batch 7, sample 9
target labels (cls, x, y, w, h)
 [[ 17 128 159 372 147]
 [  8 283 184 106 155]
 [  8  18 174 


batch 10, sample 3
target labels (cls, x, y, w, h)
 [[  1 111   1 264 499]]
predicted labels (cls, x, y, w, h) - No objects detected!!

batch 10, sample 4
target labels (cls, x, y, w, h)
 [[ 14  24 119 241 178]]
predicted labels (cls, x, y, w, h) - No objects detected!!

batch 10, sample 5
target labels (cls, x, y, w, h)
 [[ 19 108 170 206 179]
 [  4  66 254  49 116]
 [  4 209  51  25  64]]
predicted labels (cls, x, y, w, h)
 [[ 19  70 101 282 295]]

batch 10, sample 6
target labels (cls, x, y, w, h)
 [[ 11  87   1 402 374]]
predicted labels (cls, x, y, w, h)
 [[ 11 103  16 370 344]]

batch 10, sample 7
target labels (cls, x, y, w, h)
 [[ 11 247 282 103  61]]
predicted labels (cls, x, y, w, h) - No objects detected!!

batch 10, sample 8
target labels (cls, x, y, w, h)
 [[  7  54  42 446 333]]
predicted labels (cls, x, y, w, h) - No objects detected!!

batch 10, sample 9
target labels (cls, x, y, w, h)
 [[ 14   1 109 233 266]]
predicted labels (cls, x, y, w, h)
 [[ 14 165 137  83 147]



batch 13, sample 1
target labels (cls, x, y, w, h)
 [[ 14 234  25 113 230]
 [ 14 144  56  91 117]
 [ 13 101 140 331 178]
 [ 13  31 133 123 153]]
predicted labels (cls, x, y, w, h) - No objects detected!!

batch 13, sample 2
target labels (cls, x, y, w, h)
 [[ 14  93  28 407 372]]
predicted labels (cls, x, y, w, h) - No objects detected!!

batch 13, sample 3
target labels (cls, x, y, w, h)
 [[ 14 187 123 120 377]
 [ 14  93 167 116 318]
 [  8 274 267  87 146]
 [ 15  33 251  38  90]
 [ 15   1 191  38 148]]
predicted labels (cls, x, y, w, h)
 [[ 14  88 155 128 336]
 [ 15  22 250  34  79]]

batch 13, sample 4
target labels (cls, x, y, w, h)
 [[ 19 251  41 175 135]
 [ 19  17  41 176 140]
 [ 14   1 175  32 116]
 [ 14 289 183  83  84]]
predicted labels (cls, x, y, w, h)
 [[ 14  -4 181  54 183]
 [ 14 247 152 153 135]
 [ 14 310 183  48  79]]

batch 13, sample 5
target labels (cls, x, y, w, h)
 [[ 15 387  99  98 131]
 [ 15 368 185  64  50]
 [ 15 244 168  29  29]
 [ 15 255 110  78  72]
 [  8 267 

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>