In [36]:
# coding='utf-8'
import os
import sys
import numpy as np
import time
import datetime
import json
import importlib
import logging
import shutil
import random

import matplotlib
# matplotlib.use('Agg')
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from matplotlib.ticker import NullLocator
from PIL import Image
import torch
import torch.nn as nn

from yolo_model import yoloModel
from PASCAL_Dataloader import create_split_loaders
from YOLO_Loss import YoloLoss
from utils import NMS, remove_nulls, convert_to_xywh
from bbox import non_max_suppression

%matplotlib inline

In [2]:
config = {}
config["batch_size"] = 16
config['backbone_name'] = "darknet_53"
config['backbone_pretrained'] = ""
config['anchors'] = [[[116, 90], [156, 198], [373, 326]],
                            [[30, 61], [62, 45], [59, 119]],
                            [[10, 13], [16, 30], [33, 23]]]
config['classes'] = 20
config['img_h'] = config['img_w'] = 416
config['confidence_threshold'] = 0.5
config['nms_threshold'] = 0.4
config['pretrain_snapshot'] = "./states/20190531004422/model.pth" # "./states/20190530222509/model.pth"
config['root_dir'] = './VOC2012'

In [3]:
CLASS_ID = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", \
                "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", \
                "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]

In [4]:
_, _, test_loader = create_split_loaders(root_dir=config['root_dir'], batch_size=config['batch_size'])

In [46]:
def test(config, test_loader):
    # Configurations for plots
    if not os.path.isdir("./detected_images/"):
        os.makedirs("./detected_images/")
    cmap = plt.get_cmap('tab20b')
    colors = [cmap(i) for i in np.linspace(0, 1, 40)]
    
    # Load and initialize network
    net = yoloModel(config).cuda()

    # Restore pretrain model
    if config["pretrain_snapshot"]:
        print('Loading model...')
        logging.info("load checkpoint from {}".format(config["pretrain_snapshot"]))
        state_dict = torch.load(config["pretrain_snapshot"])
        net.load_state_dict(state_dict)
    else:
        raise Exception("missing pretrain_snapshot!!!")


    # YOLO loss with 3 scales
    yolo_losses = []
    for i in range(3):
        yolo_losses.append(YoloLoss(config["classes"], (config["img_w"], config["img_h"]), config["anchors"][i]))
        
    # Start inference
    print('Ready for detections!')             
    for batch, sample in enumerate(test_loader):
        if batch >=10:
            print('breaking')
            break
            
        images, labels = sample["image"], sample["label"]
        orig_W, orig_H = sample["orig_img"][0], sample["orig_img"][1]
        target_labels = sample["target_label"]
        imgfiles = sample["imgfile"]
        imgpaths = [os.path.join(config["root_dir"], 'JPEGImages', x) for x in imgfiles]
        
        orig_W = orig_W.to('cpu').numpy()
        orig_H = orig_H.to('cpu').numpy()
        images = images.cuda()
        
        with torch.no_grad():
            outputs = net(images)
            output_list = []
            for i in range(3):
                output_list.append(yolo_losses[i](outputs[i]))
            detections = torch.cat(output_list, 1)
            detections = non_max_suppression(detections, 20)

        for ii, detection in enumerate(detections): ## 0 to 15
        # ------------------------------------- #
        # Parsing targets and detections labels #
        # ------------------------------------- #
            print('\nbatch %d, sample %d' %(batch,ii))
            targets = convert_to_xywh(remove_nulls(target_labels[ii,:].to('cpu').numpy())) #70x5
            print('target labels (cls, x, y, w, h)\n', targets)           
            
            if detection is not None:
                # Rescale coordinates to original dimensions
                RATIO_W = orig_W[ii] / config["img_w"]
                RATIO_H = orig_H[ii] / config["img_h"]
            
                predictions = np.zeros((len(detection),5), dtype=int)
                for jj, (x1, y1, x2, y2, conf, cls_conf, cls_pred) in enumerate(detection):        
                    x = int(x1 * RATIO_W)
                    y = int(y1 * RATIO_H)
                    w = int(abs(x2 - x1) * RATIO_W)
                    h = int(abs(y2 - y1) * RATIO_H)
                    predictions[jj,:] = (int(cls_pred), x, y, w, h) 
                print('predicted labels (cls, x, y, w, h)\n', predictions)
                
            else:
                predictions = []
                print('predicted labels (cls, x, y, w, h)\n - No objects detected!!')
        
        # ------------------- #
        # Plot bounding boxes #
        # ------------------- #
            plt.figure()
            fig, ax = plt.subplots(1)
            img = Image.open(imgpaths[ii]).convert('RGB')
            ax.imshow(img)
            
            # -------------------------- #
            # Plotting groundtruth boxes #
            # -------------------------- #
            for jj in range(len(targets)):
                cls, x, y, w, h = targets[jj,0], targets[jj,1], targets[jj,2], targets[jj,3], targets[jj,4]
                color = colors[cls]
                # Create a Rectangle patches
                bbox = patches.Rectangle((x, y), w, h, linewidth=2, \
                                         edgecolor=color, facecolor='none')
                # Add the bbox to the plot
                ax.add_patch(bbox)
                # Add label
                plt.text(x, y, s=CLASS_ID[cls], color='white', \
                         verticalalignment='top',bbox={'color': color, 'pad': 0})
            
            # ------------------------ #
            # Plotting predicted boxes #
            # ------------------------ #
            for jj in range(len(predictions)):
                cls, x, y, w, h = predictions[jj,0], predictions[jj,1], \
                                  predictions[jj,2], predictions[jj,3], predictions[jj,4]
                color = colors[cls+20]
                # Create a Rectangle patches
                bbox = patches.Rectangle((x, y), w, h, linewidth=2, \
                                         edgecolor=color, facecolor='none')
                # Add the bbox to the plot
                ax.add_patch(bbox)
                # Add label
                plt.text(x, y, s=CLASS_ID[cls], color='black', \
                         verticalalignment='top',bbox={'color': color, 'pad': 0})
                
            # Save generated image with detections
            plt.axis('off')
            plt.gca().xaxis.set_major_locator(NullLocator())
            plt.gca().yaxis.set_major_locator(NullLocator())
            plt.savefig('detected_images/{}_{}.jpg'.format(batch, ii), bbox_inches='tight', pad_inches=0.0)
            plt.close()
            


In [47]:
if __name__ == "__main__":
    test(config,test_loader)

Loading model...
Ready for detections!

batch 0, sample 0
target labels (cls, x, y, w, h)
 [[  3  73 133 361  60]]
predicted labels (cls, x, y, w, h)
 - No objects detected!!

batch 0, sample 1
target labels (cls, x, y, w, h)
 [[  4 159 289  40 122]
 [ 14   2  42 294 369]
 [  8   2 187  79 183]]
predicted labels (cls, x, y, w, h)
 [[  4 137 309  93  98]]

batch 0, sample 2
target labels (cls, x, y, w, h)
 [[ 13   1   1 499 374]]
predicted labels (cls, x, y, w, h)
 - No objects detected!!

batch 0, sample 3
target labels (cls, x, y, w, h)
 [[ 11   5   1 445 309]]
predicted labels (cls, x, y, w, h)
 - No objects detected!!

batch 0, sample 4
target labels (cls, x, y, w, h)
 [[ 13 139 116 165 298]
 [ 14  20 128 177 241]]
predicted labels (cls, x, y, w, h)
 - No objects detected!!

batch 0, sample 5
target labels (cls, x, y, w, h)
 [[ 10  12  38 473 337]
 [  4 233 145  49 105]
 [  8   1   1 145 132]
 [  8 409 221  91 154]
 [  8 349   1 119  59]
 [ 14 170 164  64  39]]
predicted labels (cls




batch 1, sample 5
target labels (cls, x, y, w, h)
 [[  7  23 192 290 307]
 [  7 116 210  51  52]
 [ 14  51   3 287 497]]
predicted labels (cls, x, y, w, h)
 - No objects detected!!

batch 1, sample 6
target labels (cls, x, y, w, h)
 [[  7  69  63 414 178]]
predicted labels (cls, x, y, w, h)
 - No objects detected!!

batch 1, sample 7
target labels (cls, x, y, w, h)
 [[  2  13 111 446 193]]
predicted labels (cls, x, y, w, h)
 - No objects detected!!

batch 1, sample 8
target labels (cls, x, y, w, h)
 [[ 18  62 142 305  66]]
predicted labels (cls, x, y, w, h)
 - No objects detected!!

batch 1, sample 9
target labels (cls, x, y, w, h)
 [[  6  23   1 477 278]]
predicted labels (cls, x, y, w, h)
 - No objects detected!!

batch 1, sample 10
target labels (cls, x, y, w, h)
 [[ 17  23 166 119  59]]
predicted labels (cls, x, y, w, h)
 - No objects detected!!

batch 1, sample 11
target labels (cls, x, y, w, h)
 [[ 13  33  79 429 181]
 [ 14 157  63 208 208]]
predicted labels (cls, x, y, w, h)
 -


batch 4, sample 7
target labels (cls, x, y, w, h)
 [[ 14  46   3 175 497]
 [  1   1 295  78  99]]
predicted labels (cls, x, y, w, h)
 [[ 14  62   0 136 438]]

batch 4, sample 8
target labels (cls, x, y, w, h)
 [[ 11 145  17 106 153]
 [ 11 410  11  89 138]
 [ 11 325 113 167 262]
 [ 11   1 125 134 239]]
predicted labels (cls, x, y, w, h)
 - No objects detected!!

batch 4, sample 9
target labels (cls, x, y, w, h)
 [[  4   9 216 235 284]]
predicted labels (cls, x, y, w, h)
 - No objects detected!!

batch 4, sample 10
target labels (cls, x, y, w, h)
 [[ 19  52  27 340 318]]
predicted labels (cls, x, y, w, h)
 [[ 19  76  16 331 338]]

batch 4, sample 11
target labels (cls, x, y, w, h)
 [[ 14   1   1 319 239]
 [ 14  54  37 266 193]]
predicted labels (cls, x, y, w, h)
 - No objects detected!!

batch 4, sample 12
target labels (cls, x, y, w, h)
 [[  6   1 149 499 128]]
predicted labels (cls, x, y, w, h)
 - No objects detected!!

batch 4, sample 13
target labels (cls, x, y, w, h)
 [[ 18 210  82


batch 7, sample 4
target labels (cls, x, y, w, h)
 [[  0  14  71 469 149]]
predicted labels (cls, x, y, w, h)
 [[  0  43 106 402  80]]

batch 7, sample 5
target labels (cls, x, y, w, h)
 [[ 14 118 148  48 156]
 [ 11 137 253  52  39]]
predicted labels (cls, x, y, w, h)
 [[ 14 118 162  49 124]]

batch 7, sample 6
target labels (cls, x, y, w, h)
 [[ 18  27  62 473 202]]
predicted labels (cls, x, y, w, h)
 - No objects detected!!

batch 7, sample 7
target labels (cls, x, y, w, h)
 [[ 11  42  89 404 309]
 [ 15   1   1 135 309]]
predicted labels (cls, x, y, w, h)
 - No objects detected!!

batch 7, sample 8
target labels (cls, x, y, w, h)
 [[  7  55 112 281 286]]
predicted labels (cls, x, y, w, h)
 - No objects detected!!

batch 7, sample 9
target labels (cls, x, y, w, h)
 [[  0 237 195 154 125]]
predicted labels (cls, x, y, w, h)
 - No objects detected!!

batch 7, sample 10
target labels (cls, x, y, w, h)
 [[ 14  13   5 290 495]
 [ 14 167 240 188 172]]
predicted labels (cls, x, y, w, h)
 - 

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>