In [1]:
import numpy as np
import cv2  # OpenCVライブラリ

import matplotlib.pyplot as plt 
%matplotlib inline

import torch

import pickle

import torch.utils.data as data
from itertools import product as product

import torch
import torch.nn as nn
import torch.nn.init as init
import torch.nn.functional as F
from torch.autograd import Function

In [2]:
# import dataset
from utils.dataset import VOCDataset, DatasetTransform, make_datapath_list, Anno_xml2list, od_collate_fn

In [3]:
# set your VOCdevkit path!
vocpath = "../VOCdevkit/VOC2007"
DEVKIT_PATH = "../VOCdevkit/"
SET = "test"
train_img_list, train_anno_list, val_img_list, val_anno_list = make_datapath_list(vocpath)

In [15]:
val_img_list[0:10]

['../VOCdevkit/VOC2007/JPEGImages/000001.jpg',
 '../VOCdevkit/VOC2007/JPEGImages/000002.jpg',
 '../VOCdevkit/VOC2007/JPEGImages/000003.jpg',
 '../VOCdevkit/VOC2007/JPEGImages/000004.jpg',
 '../VOCdevkit/VOC2007/JPEGImages/000006.jpg',
 '../VOCdevkit/VOC2007/JPEGImages/000008.jpg',
 '../VOCdevkit/VOC2007/JPEGImages/000010.jpg',
 '../VOCdevkit/VOC2007/JPEGImages/000011.jpg',
 '../VOCdevkit/VOC2007/JPEGImages/000013.jpg',
 '../VOCdevkit/VOC2007/JPEGImages/000014.jpg']

In [14]:
val_anno_list[0:10]

['../VOCdevkit/VOC2007/Annotations/000001.xml',
 '../VOCdevkit/VOC2007/Annotations/000002.xml',
 '../VOCdevkit/VOC2007/Annotations/000003.xml',
 '../VOCdevkit/VOC2007/Annotations/000004.xml',
 '../VOCdevkit/VOC2007/Annotations/000006.xml',
 '../VOCdevkit/VOC2007/Annotations/000008.xml',
 '../VOCdevkit/VOC2007/Annotations/000010.xml',
 '../VOCdevkit/VOC2007/Annotations/000011.xml',
 '../VOCdevkit/VOC2007/Annotations/000013.xml',
 '../VOCdevkit/VOC2007/Annotations/000014.xml']

In [6]:
image_index = []
for l in val_img_list:
    image_index.append(l[-10:-4])

image_index[0]

'000001'

In [7]:
class_names = ['aeroplane', 'bicycle', 'bird', 'boat',
               'bottle', 'bus', 'car', 'cat', 'chair',
               'cow', 'diningtable', 'dog', 'horse',
               'motorbike', 'person', 'pottedplant',
               'sheep', 'sofa', 'train', 'tvmonitor']
color_mean = (104, 117, 123)  # (BGR)の色の平均値
input_size = 300  # 画像のinputサイズを300×300にする

## DatasetTransformを適応
transform = DatasetTransform(input_size, color_mean)
transform_anno = Anno_xml2list(class_names)

In [8]:
val_dataset = VOCDataset(val_img_list, val_anno_list, phase="val", transform=DatasetTransform(
    input_size, color_mean), transform_anno=Anno_xml2list(class_names))

In [16]:
val_dataloader = data.DataLoader(
    val_dataset, batch_size=1, shuffle=False, collate_fn=od_collate_fn, num_workers=1)

# set up model

In [17]:
from utils.feature_piramid_network import FPNSSD as SSD

voc_classes = ['aeroplane', 'bicycle', 'bird', 'boat',
               'bottle', 'bus', 'car', 'cat', 'chair',
               'cow', 'diningtable', 'dog', 'horse',
               'motorbike', 'person', 'pottedplant',
               'sheep', 'sofa', 'train', 'tvmonitor']

# SSD300の設定
ssd_cfg = {
    'num_classes': 21,  # 背景クラスを含めた合計クラス数
    'input_size': 300,  # 画像の入力サイズ
    'bbox_aspect_num': [4, 6, 6, 6, 4, 4],  # 出力するDBoxのアスペクト比の種類
    'feature_maps': [38, 19, 10, 5, 3, 1],  # 各sourceの画像サイズ
    'steps': [8, 16, 32, 64, 100, 300],  # DBOXの大きさを決める
    'min_sizes': [30, 60, 111, 162, 213, 264],  # DBOXの大きさを決める
    'max_sizes': [60, 111, 162, 213, 264, 315],  # DBOXの大きさを決める
    'aspect_ratios': [[2], [2, 3], [2, 3], [2, 3], [2], [2]],
}

# SSDネットワークモデル
net = SSD(phase="inference", cfg=ssd_cfg).eval()

# SSDの学習済みの重みを設定
checkpoint = torch.load("./weights/ssd_fpn_300_200.pth")
net.load_state_dict(checkpoint["state_dict"])
#optimizer.load_state_dict(checkpoint['optimizer'])
epoch = checkpoint['epoch']
print('loaded the trained weights')

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("using:", device)

net = net.to(device)

loaded the trained weights
using: cuda:0


In [18]:
all_imgs = []
classes = {}
bbox_threshold = 0.05

# define detections
all_boxes = [[[] for _ in range(len(val_img_list))]
               for _ in range(21)]
empty_array = np.transpose(np.array([[],[],[],[],[]]), (1,0))

In [19]:
from utils.ssd_predict_show import SSDPredictShow
ssd = SSDPredictShow(eval_categories=voc_classes, net=net, device=device)

cuda:0


# infer images

In [None]:
all_boxes = ssd.ssd_inference(val_dataloader, all_boxes, data_confidence_level=bbox_threshold)

../VOCdevkit/VOC2007/JPEGImages/000001.jpg
../VOCdevkit/VOC2007/JPEGImages/000002.jpg
../VOCdevkit/VOC2007/JPEGImages/000003.jpg
../VOCdevkit/VOC2007/JPEGImages/000004.jpg
../VOCdevkit/VOC2007/JPEGImages/000006.jpg
../VOCdevkit/VOC2007/JPEGImages/000008.jpg
../VOCdevkit/VOC2007/JPEGImages/000010.jpg
(1, 21, 200, 5)
iter: 0
sort boxes. detection was 0.03607940673828125 and post took 0.00015115737915039062 and allboxappend took 0.006963253021240234
(1, 21, 200, 5)
iter: 1
sort boxes. detection was 0.01636981964111328 and post took 7.152557373046875e-05 and allboxappend took 0.0064983367919921875
(1, 21, 200, 5)
iter: 2
sort boxes. detection was 0.05432271957397461 and post took 9.775161743164062e-05 and allboxappend took 0.006556987762451172
(1, 21, 200, 5)
iter: 3
sort boxes. detection was 0.05200767517089844 and post took 0.00010967254638671875 and allboxappend took 0.006685018539428711
../VOCdevkit/VOC2007/JPEGImages/000011.jpg
../VOCdevkit/VOC2007/JPEGImages/000013.jpg
../VOCdevkit/V

../VOCdevkit/VOC2007/JPEGImages/000090.jpg
../VOCdevkit/VOC2007/JPEGImages/000092.jpg
../VOCdevkit/VOC2007/JPEGImages/000094.jpg
../VOCdevkit/VOC2007/JPEGImages/000096.jpg
../VOCdevkit/VOC2007/JPEGImages/000097.jpg
../VOCdevkit/VOC2007/JPEGImages/000098.jpg
../VOCdevkit/VOC2007/JPEGImages/000100.jpg
(1, 21, 200, 5)
iter: 45
sort boxes. detection was 0.06113123893737793 and post took 0.00010275840759277344 and allboxappend took 0.006566762924194336
(1, 21, 200, 5)
iter: 46
sort boxes. detection was 0.03661465644836426 and post took 6.580352783203125e-05 and allboxappend took 0.006451845169067383
(1, 21, 200, 5)
iter: 47
sort boxes. detection was 0.017416954040527344 and post took 6.461143493652344e-05 and allboxappend took 0.006446123123168945
(1, 21, 200, 5)
iter: 48
sort boxes. detection was 0.025933504104614258 and post took 7.534027099609375e-05 and allboxappend took 0.0064563751220703125
(1, 21, 200, 5)
iter: 49
sort boxes. detection was 0.011776447296142578 and post took 6.1511993

../VOCdevkit/VOC2007/JPEGImages/000185.jpg
../VOCdevkit/VOC2007/JPEGImages/000186.jpg
../VOCdevkit/VOC2007/JPEGImages/000188.jpg
(1, 21, 200, 5)
iter: 87
sort boxes. detection was 0.07254552841186523 and post took 0.00011110305786132812 and allboxappend took 0.0065724849700927734
(1, 21, 200, 5)
iter: 88
sort boxes. detection was 0.06258940696716309 and post took 9.894371032714844e-05 and allboxappend took 0.006547212600708008
(1, 21, 200, 5)
iter: 89
sort boxes. detection was 0.06661701202392578 and post took 9.322166442871094e-05 and allboxappend took 0.006552457809448242
(1, 21, 200, 5)
iter: 90
sort boxes. detection was 0.016502857208251953 and post took 7.033348083496094e-05 and allboxappend took 0.006425380706787109
(1, 21, 200, 5)
iter: 91
sort boxes. detection was 0.010947465896606445 and post took 6.628036499023438e-05 and allboxappend took 0.0064697265625
../VOCdevkit/VOC2007/JPEGImages/000191.jpg
../VOCdevkit/VOC2007/JPEGImages/000195.jpg
../VOCdevkit/VOC2007/JPEGImages/0001

../VOCdevkit/VOC2007/JPEGImages/000277.jpg
../VOCdevkit/VOC2007/JPEGImages/000279.jpg
../VOCdevkit/VOC2007/JPEGImages/000280.jpg
../VOCdevkit/VOC2007/JPEGImages/000281.jpg
../VOCdevkit/VOC2007/JPEGImages/000283.jpg
iter: 133
sort boxes. detection was 0.018732786178588867 and post took 5.8650970458984375e-05 and allboxappend took 0.006651163101196289
(1, 21, 200, 5)
iter: 134
sort boxes. detection was 0.022764921188354492 and post took 5.9604644775390625e-05 and allboxappend took 0.006448984146118164
(1, 21, 200, 5)
iter: 135
sort boxes. detection was 0.011121749877929688 and post took 6.031990051269531e-05 and allboxappend took 0.006485939025878906
(1, 21, 200, 5)
iter: 136
sort boxes. detection was 0.04390215873718262 and post took 9.608268737792969e-05 and allboxappend took 0.0064547061920166016
(1, 21, 200, 5)
iter: 137
sort boxes. detection was 0.023571491241455078 and post took 7.82012939453125e-05 and allboxappend took 0.006509542465209961
../VOCdevkit/VOC2007/JPEGImages/000284.j

../VOCdevkit/VOC2007/JPEGImages/000365.jpg
../VOCdevkit/VOC2007/JPEGImages/000366.jpg
../VOCdevkit/VOC2007/JPEGImages/000368.jpg
../VOCdevkit/VOC2007/JPEGImages/000369.jpg
../VOCdevkit/VOC2007/JPEGImages/000371.jpg
../VOCdevkit/VOC2007/JPEGImages/000375.jpg
(1, 21, 200, 5)
iter: 179
sort boxes. detection was 0.03782010078430176 and post took 0.00011396408081054688 and allboxappend took 0.006485462188720703
(1, 21, 200, 5)
iter: 180
sort boxes. detection was 0.011396408081054688 and post took 9.036064147949219e-05 and allboxappend took 0.006530284881591797
(1, 21, 200, 5)
iter: 181
sort boxes. detection was 0.02889108657836914 and post took 0.00010848045349121094 and allboxappend took 0.006547689437866211
(1, 21, 200, 5)
iter: 182
sort boxes. detection was 0.012468814849853516 and post took 9.393692016601562e-05 and allboxappend took 0.006578207015991211
(1, 21, 200, 5)
iter: 183
sort boxes. detection was 0.024688005447387695 and post took 9.417533874511719e-05 and allboxappend took 0.0

../VOCdevkit/VOC2007/JPEGImages/000449.jpg
../VOCdevkit/VOC2007/JPEGImages/000451.jpg
(1, 21, 200, 5)
iter: 220
sort boxes. detection was 0.014830827713012695 and post took 9.322166442871094e-05 and allboxappend took 0.006510257720947266
(1, 21, 200, 5)
iter: 221
sort boxes. detection was 0.011852264404296875 and post took 7.939338684082031e-05 and allboxappend took 0.006670236587524414
(1, 21, 200, 5)
iter: 222
sort boxes. detection was 0.020875930786132812 and post took 0.00010037422180175781 and allboxappend took 0.006428956985473633
(1, 21, 200, 5)
iter: 223
sort boxes. detection was 0.011578798294067383 and post took 7.009506225585938e-05 and allboxappend took 0.0064165592193603516
(1, 21, 200, 5)
iter: 224
sort boxes. detection was 0.011574506759643555 and post took 7.176399230957031e-05 and allboxappend took 0.0065686702728271484
(1, 21, 200, 5)
iter: 225
sort boxes. detection was 0.011333942413330078 and post took 6.771087646484375e-05 and allboxappend took 0.006506681442260742

../VOCdevkit/VOC2007/JPEGImages/000533.jpg
../VOCdevkit/VOC2007/JPEGImages/000534.jpg
../VOCdevkit/VOC2007/JPEGImages/000536.jpg
../VOCdevkit/VOC2007/JPEGImages/000538.jpg
../VOCdevkit/VOC2007/JPEGImages/000539.jpg
../VOCdevkit/VOC2007/JPEGImages/000542.jpg
(1, 21, 200, 5)
iter: 264
sort boxes. detection was 0.07171177864074707 and post took 0.000102996826171875 and allboxappend took 0.0067653656005859375
(1, 21, 200, 5)
iter: 265
sort boxes. detection was 0.03258657455444336 and post took 8.082389831542969e-05 and allboxappend took 0.006554365158081055
(1, 21, 200, 5)
iter: 266
sort boxes. detection was 0.01773357391357422 and post took 6.651878356933594e-05 and allboxappend took 0.006342649459838867
(1, 21, 200, 5)
iter: 267
sort boxes. detection was 0.014890670776367188 and post took 6.246566772460938e-05 and allboxappend took 0.0064046382904052734
(1, 21, 200, 5)
iter: 268
sort boxes. detection was 0.011548519134521484 and post took 5.984306335449219e-05 and allboxappend took 0.006

../VOCdevkit/VOC2007/JPEGImages/000615.jpg
../VOCdevkit/VOC2007/JPEGImages/000616.jpg
../VOCdevkit/VOC2007/JPEGImages/000617.jpg
../VOCdevkit/VOC2007/JPEGImages/000618.jpg
../VOCdevkit/VOC2007/JPEGImages/000621.jpg
../VOCdevkit/VOC2007/JPEGImages/000623.jpg
../VOCdevkit/VOC2007/JPEGImages/000624.jpg
(1, 21, 200, 5)
iter: 310
sort boxes. detection was 0.03705024719238281 and post took 7.796287536621094e-05 and allboxappend took 0.006549835205078125
(1, 21, 200, 5)
iter: 311
sort boxes. detection was 0.014284849166870117 and post took 6.532669067382812e-05 and allboxappend took 0.006327152252197266
(1, 21, 200, 5)
iter: 312
sort boxes. detection was 0.013298988342285156 and post took 6.580352783203125e-05 and allboxappend took 0.006486415863037109
(1, 21, 200, 5)
iter: 313
sort boxes. detection was 0.031028032302856445 and post took 0.0003612041473388672 and allboxappend took 0.006124019622802734
(1, 21, 200, 5)
iter: 314
sort boxes. detection was 0.01622772216796875 and post took 6.5803

../VOCdevkit/VOC2007/JPEGImages/000692.jpg
../VOCdevkit/VOC2007/JPEGImages/000693.jpg
../VOCdevkit/VOC2007/JPEGImages/000696.jpg
../VOCdevkit/VOC2007/JPEGImages/000697.jpg
(1, 21, 200, 5)
iter: 353
sort boxes. detection was 0.05312323570251465 and post took 0.0001354217529296875 and allboxappend took 0.006445646286010742
(1, 21, 200, 5)
iter: 354
sort boxes. detection was 0.06254839897155762 and post took 0.00010848045349121094 and allboxappend took 0.006802082061767578
(1, 21, 200, 5)
iter: 355
sort boxes. detection was 0.017705917358398438 and post took 0.0001010894775390625 and allboxappend took 0.006621360778808594
(1, 21, 200, 5)
iter: 356
sort boxes. detection was 0.0215299129486084 and post took 8.7738037109375e-05 and allboxappend took 0.006516695022583008
(1, 21, 200, 5)
../VOCdevkit/VOC2007/JPEGImages/000698.jpg
../VOCdevkit/VOC2007/JPEGImages/000701.jpg
../VOCdevkit/VOC2007/JPEGImages/000703.jpg
../VOCdevkit/VOC2007/JPEGImages/000704.jpg
iter: 357
sort boxes. detection was 0

../VOCdevkit/VOC2007/JPEGImages/000789.jpg
../VOCdevkit/VOC2007/JPEGImages/000790.jpg
../VOCdevkit/VOC2007/JPEGImages/000792.jpg
../VOCdevkit/VOC2007/JPEGImages/000795.jpg
../VOCdevkit/VOC2007/JPEGImages/000798.jpg
(1, 21, 200, 5)
iter: 401
sort boxes. detection was 0.08022761344909668 and post took 0.00010848045349121094 and allboxappend took 0.006832122802734375
(1, 21, 200, 5)
iter: 402
sort boxes. detection was 0.012238264083862305 and post took 8.726119995117188e-05 and allboxappend took 0.0063631534576416016
(1, 21, 200, 5)
iter: 403
sort boxes. detection was 0.04175972938537598 and post took 0.0003719329833984375 and allboxappend took 0.006293058395385742
(1, 21, 200, 5)
iter: 404
sort boxes. detection was 0.03560352325439453 and post took 9.655952453613281e-05 and allboxappend took 0.006590604782104492
(1, 21, 200, 5)
iter: 405
sort boxes. detection was 0.06467700004577637 and post took 0.00010275840759277344 and allboxappend took 0.006445646286010742
../VOCdevkit/VOC2007/JPEGI

../VOCdevkit/VOC2007/JPEGImages/000897.jpg
../VOCdevkit/VOC2007/JPEGImages/000901.jpg
../VOCdevkit/VOC2007/JPEGImages/000905.jpg
../VOCdevkit/VOC2007/JPEGImages/000907.jpg
../VOCdevkit/VOC2007/JPEGImages/000909.jpg
../VOCdevkit/VOC2007/JPEGImages/000910.jpg
(1, 21, 200, 5)
iter: 447
sort boxes. detection was 0.014582633972167969 and post took 7.62939453125e-05 and allboxappend took 0.006479978561401367
(1, 21, 200, 5)
iter: 448
sort boxes. detection was 0.023178577423095703 and post took 0.00011277198791503906 and allboxappend took 0.006649971008300781
(1, 21, 200, 5)
iter: 449
sort boxes. detection was 0.02820754051208496 and post took 0.00011801719665527344 and allboxappend took 0.006690263748168945
(1, 21, 200, 5)
iter: 450
sort boxes. detection was 0.013638734817504883 and post took 9.965896606445312e-05 and allboxappend took 0.006545066833496094
(1, 21, 200, 5)
iter: 451
sort boxes. detection was 0.0625143051147461 and post took 0.00010609626770019531 and allboxappend took 0.00662

../VOCdevkit/VOC2007/JPEGImages/000985.jpg
../VOCdevkit/VOC2007/JPEGImages/000986.jpg
../VOCdevkit/VOC2007/JPEGImages/000988.jpg
../VOCdevkit/VOC2007/JPEGImages/000990.jpg
(1, 21, 200, 5)
iter: 491
sort boxes. detection was 0.020795345306396484 and post took 7.414817810058594e-05 and allboxappend took 0.006451845169067383
(1, 21, 200, 5)
iter: 492
sort boxes. detection was 0.024569988250732422 and post took 0.0002846717834472656 and allboxappend took 0.006150722503662109
(1, 21, 200, 5)
iter: 493
sort boxes. detection was 0.0799875259399414 and post took 0.0003409385681152344 and allboxappend took 0.006469011306762695
(1, 21, 200, 5)
iter: 494
sort boxes. detection was 0.012357950210571289 and post took 6.270408630371094e-05 and allboxappend took 0.006345510482788086
../VOCdevkit/VOC2007/JPEGImages/000992.jpg
../VOCdevkit/VOC2007/JPEGImages/000994.jpg
../VOCdevkit/VOC2007/JPEGImages/000995.jpg
../VOCdevkit/VOC2007/JPEGImages/000998.jpg
../VOCdevkit/VOC2007/JPEGImages/001000.jpg
../VOCd

../VOCdevkit/VOC2007/JPEGImages/001059.jpg
../VOCdevkit/VOC2007/JPEGImages/001063.jpg
../VOCdevkit/VOC2007/JPEGImages/001065.jpg
../VOCdevkit/VOC2007/JPEGImages/001067.jpg
../VOCdevkit/VOC2007/JPEGImages/001070.jpg
../VOCdevkit/VOC2007/JPEGImages/001075.jpg
../VOCdevkit/VOC2007/JPEGImages/001076.jpg
(1, 21, 200, 5)
iter: 534
sort boxes. detection was 0.016510963439941406 and post took 8.0108642578125e-05 and allboxappend took 0.006514787673950195
(1, 21, 200, 5)
iter: 535
sort boxes. detection was 0.01275944709777832 and post took 7.534027099609375e-05 and allboxappend took 0.006446361541748047
(1, 21, 200, 5)
iter: 536
sort boxes. detection was 0.05555105209350586 and post took 0.00010895729064941406 and allboxappend took 0.006555795669555664
(1, 21, 200, 5)
iter: 537
sort boxes. detection was 0.02402663230895996 and post took 9.34600830078125e-05 and allboxappend took 0.006571292877197266
(1, 21, 200, 5)
iter: 538
sort boxes. detection was 0.022977352142333984 and post took 0.0001020

../VOCdevkit/VOC2007/JPEGImages/001153.jpg
../VOCdevkit/VOC2007/JPEGImages/001155.jpg
../VOCdevkit/VOC2007/JPEGImages/001157.jpg
../VOCdevkit/VOC2007/JPEGImages/001159.jpg
../VOCdevkit/VOC2007/JPEGImages/001162.jpg
../VOCdevkit/VOC2007/JPEGImages/001163.jpg
(1, 21, 200, 5)
iter: 578
sort boxes. detection was 0.056311845779418945 and post took 0.00010466575622558594 and allboxappend took 0.006577968597412109
(1, 21, 200, 5)
iter: 579
sort boxes. detection was 0.015486955642700195 and post took 0.0002346038818359375 and allboxappend took 0.006291389465332031
(1, 21, 200, 5)
iter: 580
sort boxes. detection was 0.01725149154663086 and post took 7.462501525878906e-05 and allboxappend took 0.006496429443359375
(1, 21, 200, 5)
iter: 581
sort boxes. detection was 0.019309043884277344 and post took 0.00011086463928222656 and allboxappend took 0.0064716339111328125
(1, 21, 200, 5)
iter: 582
sort boxes. detection was 0.04001355171203613 and post took 0.00010418891906738281 and allboxappend took 0

../VOCdevkit/VOC2007/JPEGImages/001249.jpg
../VOCdevkit/VOC2007/JPEGImages/001251.jpg
../VOCdevkit/VOC2007/JPEGImages/001252.jpg
../VOCdevkit/VOC2007/JPEGImages/001253.jpg
../VOCdevkit/VOC2007/JPEGImages/001255.jpg
(1, 21, 200, 5)
iter: 623
sort boxes. detection was 0.08701968193054199 and post took 8.821487426757812e-05 and allboxappend took 0.0066187381744384766
(1, 21, 200, 5)
iter: 624
sort boxes. detection was 0.05800294876098633 and post took 9.655952453613281e-05 and allboxappend took 0.0064775943756103516
(1, 21, 200, 5)
iter: 625
sort boxes. detection was 0.02901482582092285 and post took 8.249282836914062e-05 and allboxappend took 0.00648951530456543
(1, 21, 200, 5)
iter: 626
sort boxes. detection was 0.021556377410888672 and post took 8.630752563476562e-05 and allboxappend took 0.0064029693603515625
(1, 21, 200, 5)
iter: 627
sort boxes. detection was 0.03893876075744629 and post took 8.702278137207031e-05 and allboxappend took 0.006394147872924805
../VOCdevkit/VOC2007/JPEGIm

../VOCdevkit/VOC2007/JPEGImages/001342.jpg
iter: 664
sort boxes. detection was 0.029477357864379883 and post took 0.00010418891906738281 and allboxappend took 0.00700688362121582
(1, 21, 200, 5)
iter: 665
sort boxes. detection was 0.0332486629486084 and post took 9.775161743164062e-05 and allboxappend took 0.0066258907318115234
(1, 21, 200, 5)
iter: 666
sort boxes. detection was 0.02335357666015625 and post took 0.00010776519775390625 and allboxappend took 0.006580352783203125
(1, 21, 200, 5)
iter: 667
sort boxes. detection was 0.05137181282043457 and post took 6.437301635742188e-05 and allboxappend took 0.006487846374511719
(1, 21, 200, 5)
iter: 668
sort boxes. detection was 0.012771368026733398 and post took 5.984306335449219e-05 and allboxappend took 0.006268978118896484
(1, 21, 200, 5)
iter: 669
sort boxes. detection was 0.021608591079711914 and post took 0.0002856254577636719 and allboxappend took 0.006300210952758789
../VOCdevkit/VOC2007/JPEGImages/001344.jpg
../VOCdevkit/VOC2007

../VOCdevkit/VOC2007/JPEGImages/001412.jpg
../VOCdevkit/VOC2007/JPEGImages/001415.jpg
../VOCdevkit/VOC2007/JPEGImages/001416.jpg
../VOCdevkit/VOC2007/JPEGImages/001417.jpg
../VOCdevkit/VOC2007/JPEGImages/001419.jpg
../VOCdevkit/VOC2007/JPEGImages/001422.jpg
(1, 21, 200, 5)
iter: 708
sort boxes. detection was 0.08799362182617188 and post took 0.00010848045349121094 and allboxappend took 0.006711721420288086
(1, 21, 200, 5)
iter: 709
sort boxes. detection was 0.046221017837524414 and post took 9.775161743164062e-05 and allboxappend took 0.006603717803955078
(1, 21, 200, 5)
iter: 710
sort boxes. detection was 0.018473386764526367 and post took 9.5367431640625e-05 and allboxappend took 0.0065424442291259766
(1, 21, 200, 5)
iter: 711
sort boxes. detection was 0.017573118209838867 and post took 9.608268737792969e-05 and allboxappend took 0.006478786468505859
(1, 21, 200, 5)
iter: 712
sort boxes. detection was 0.032326459884643555 and post took 0.00010132789611816406 and allboxappend took 0.0

../VOCdevkit/VOC2007/JPEGImages/001507.jpg
(1, 21, 200, 5)
iter: 749
sort boxes. detection was 0.015946149826049805 and post took 7.081031799316406e-05 and allboxappend took 0.006474018096923828
(1, 21, 200, 5)
iter: 750
sort boxes. detection was 0.01905083656311035 and post took 7.033348083496094e-05 and allboxappend took 0.0064775943756103516
(1, 21, 200, 5)
iter: 751
sort boxes. detection was 0.02444148063659668 and post took 7.2479248046875e-05 and allboxappend took 0.0065762996673583984
(1, 21, 200, 5)
iter: 752
sort boxes. detection was 0.03155970573425293 and post took 0.00011014938354492188 and allboxappend took 0.006681203842163086
(1, 21, 200, 5)
iter: 753
sort boxes. detection was 0.015311241149902344 and post took 0.00012087821960449219 and allboxappend took 0.006505012512207031
(1, 21, 200, 5)
iter: 754
sort boxes. detection was 0.04815506935119629 and post took 0.00010275840759277344 and allboxappend took 0.006437540054321289
../VOCdevkit/VOC2007/JPEGImages/001508.jpg
../

../VOCdevkit/VOC2007/JPEGImages/001589.jpg
../VOCdevkit/VOC2007/JPEGImages/001591.jpg
../VOCdevkit/VOC2007/JPEGImages/001592.jpg
../VOCdevkit/VOC2007/JPEGImages/001596.jpg
../VOCdevkit/VOC2007/JPEGImages/001599.jpg
../VOCdevkit/VOC2007/JPEGImages/001600.jpg
../VOCdevkit/VOC2007/JPEGImages/001601.jpg
(1, 21, 200, 5)
iter: 796
sort boxes. detection was 0.030942440032958984 and post took 0.00047850608825683594 and allboxappend took 0.006272554397583008
(1, 21, 200, 5)
iter: 797
sort boxes. detection was 0.04551053047180176 and post took 9.703636169433594e-05 and allboxappend took 0.00665736198425293
(1, 21, 200, 5)
iter: 798
sort boxes. detection was 0.019253015518188477 and post took 7.796287536621094e-05 and allboxappend took 0.0066373348236083984
(1, 21, 200, 5)
iter: 799
sort boxes. detection was 0.01703953742980957 and post took 7.200241088867188e-05 and allboxappend took 0.006524324417114258
(1, 21, 200, 5)
iter: 800
sort boxes. detection was 0.011152505874633789 and post took 6.747

../VOCdevkit/VOC2007/JPEGImages/001670.jpg
../VOCdevkit/VOC2007/JPEGImages/001671.jpg
../VOCdevkit/VOC2007/JPEGImages/001672.jpg
../VOCdevkit/VOC2007/JPEGImages/001674.jpg
../VOCdevkit/VOC2007/JPEGImages/001679.jpg
../VOCdevkit/VOC2007/JPEGImages/001681.jpg
(1, 21, 200, 5)
iter: 841
sort boxes. detection was 0.07709217071533203 and post took 8.320808410644531e-05 and allboxappend took 0.00659632682800293
(1, 21, 200, 5)
iter: 842
sort boxes. detection was 0.014655828475952148 and post took 6.556510925292969e-05 and allboxappend took 0.006485700607299805
(1, 21, 200, 5)
iter: 843
sort boxes. detection was 0.03482508659362793 and post took 0.0003428459167480469 and allboxappend took 0.00618290901184082
(1, 21, 200, 5)
iter: 844
sort boxes. detection was 0.028914213180541992 and post took 7.677078247070312e-05 and allboxappend took 0.006605625152587891
(1, 21, 200, 5)
iter: 845
sort boxes. detection was 0.022485733032226562 and post took 7.843971252441406e-05 and allboxappend took 0.00643

../VOCdevkit/VOC2007/JPEGImages/001767.jpg
../VOCdevkit/VOC2007/JPEGImages/001769.jpg
../VOCdevkit/VOC2007/JPEGImages/001770.jpg
../VOCdevkit/VOC2007/JPEGImages/001773.jpg
../VOCdevkit/VOC2007/JPEGImages/001774.jpg
iter: 887
sort boxes. detection was 0.014487504959106445 and post took 6.437301635742188e-05 and allboxappend took 0.0068013668060302734
(1, 21, 200, 5)
iter: 888
sort boxes. detection was 0.01804828643798828 and post took 7.104873657226562e-05 and allboxappend took 0.006533622741699219
(1, 21, 200, 5)
iter: 889
sort boxes. detection was 0.037831783294677734 and post took 8.749961853027344e-05 and allboxappend took 0.0065462589263916016
(1, 21, 200, 5)
iter: 890
sort boxes. detection was 0.013755559921264648 and post took 6.389617919921875e-05 and allboxappend took 0.006354570388793945
(1, 21, 200, 5)
iter: 891
sort boxes. detection was 0.029729604721069336 and post took 7.867813110351562e-05 and allboxappend took 0.006543159484863281
../VOCdevkit/VOC2007/JPEGImages/001776.j

../VOCdevkit/VOC2007/JPEGImages/001852.jpg
../VOCdevkit/VOC2007/JPEGImages/001856.jpg
../VOCdevkit/VOC2007/JPEGImages/001857.jpg
../VOCdevkit/VOC2007/JPEGImages/001859.jpg
../VOCdevkit/VOC2007/JPEGImages/001863.jpg
../VOCdevkit/VOC2007/JPEGImages/001865.jpg
iter: 931
sort boxes. detection was 0.011206865310668945 and post took 8.320808410644531e-05 and allboxappend took 0.006439208984375
(1, 21, 200, 5)
iter: 932
sort boxes. detection was 0.02873539924621582 and post took 6.151199340820312e-05 and allboxappend took 0.006463289260864258
(1, 21, 200, 5)
iter: 933
sort boxes. detection was 0.01842474937438965 and post took 6.341934204101562e-05 and allboxappend took 0.006371498107910156
(1, 21, 200, 5)
iter: 934
sort boxes. detection was 0.015439033508300781 and post took 6.103515625e-05 and allboxappend took 0.006509304046630859
(1, 21, 200, 5)
iter: 935
sort boxes. detection was 0.05867910385131836 and post took 9.775161743164062e-05 and allboxappend took 0.006616830825805664
(1, 21, 20

../VOCdevkit/VOC2007/JPEGImages/001946.jpg
(1, 21, 200, 5)
iter: 972
sort boxes. detection was 0.07898807525634766 and post took 0.00010991096496582031 and allboxappend took 0.006757259368896484
(1, 21, 200, 5)
iter: 973
sort boxes. detection was 0.019280672073364258 and post took 0.00011277198791503906 and allboxappend took 0.0065822601318359375
(1, 21, 200, 5)
iter: 974
sort boxes. detection was 0.03968071937561035 and post took 8.630752563476562e-05 and allboxappend took 0.0064470767974853516
(1, 21, 200, 5)
iter: 975
sort boxes. detection was 0.015851497650146484 and post took 6.103515625e-05 and allboxappend took 0.006366729736328125
(1, 21, 200, 5)
iter: 976
sort boxes. detection was 0.012960433959960938 and post took 7.939338684082031e-05 and allboxappend took 0.006313323974609375
(1, 21, 200, 5)
iter: 977
sort boxes. detection was 0.03258681297302246 and post took 0.00010633468627929688 and allboxappend took 0.00665593147277832
(1, 21, 200, 5)
iter: 978
sort boxes. detection wa

../VOCdevkit/VOC2007/JPEGImages/002026.jpg
../VOCdevkit/VOC2007/JPEGImages/002028.jpg
../VOCdevkit/VOC2007/JPEGImages/002029.jpg
../VOCdevkit/VOC2007/JPEGImages/002031.jpg
../VOCdevkit/VOC2007/JPEGImages/002032.jpg
(1, 21, 200, 5)
iter: 1021
sort boxes. detection was 0.020052194595336914 and post took 7.343292236328125e-05 and allboxappend took 0.00651097297668457
(1, 21, 200, 5)
iter: 1022
sort boxes. detection was 0.010956287384033203 and post took 6.389617919921875e-05 and allboxappend took 0.00633549690246582
(1, 21, 200, 5)
iter: 1023
sort boxes. detection was 0.03186202049255371 and post took 8.177757263183594e-05 and allboxappend took 0.0064847469329833984
(1, 21, 200, 5)
iter: 1024
sort boxes. detection was 0.04960751533508301 and post took 9.489059448242188e-05 and allboxappend took 0.00659632682800293
(1, 21, 200, 5)
iter: 1025
sort boxes. detection was 0.04669070243835449 and post took 0.0003452301025390625 and allboxappend took 0.006383419036865234
(1, 21, 200, 5)
iter: 102

../VOCdevkit/VOC2007/JPEGImages/002106.jpg
../VOCdevkit/VOC2007/JPEGImages/002107.jpg
../VOCdevkit/VOC2007/JPEGImages/002110.jpg
../VOCdevkit/VOC2007/JPEGImages/002111.jpg
../VOCdevkit/VOC2007/JPEGImages/002113.jpg
../VOCdevkit/VOC2007/JPEGImages/002115.jpg
iter: 1064
sort boxes. detection was 0.06636476516723633 and post took 0.00010061264038085938 and allboxappend took 0.006907463073730469
(1, 21, 200, 5)
iter: 1065
sort boxes. detection was 0.012438774108886719 and post took 9.894371032714844e-05 and allboxappend took 0.006489992141723633
(1, 21, 200, 5)
iter: 1066
sort boxes. detection was 0.014299392700195312 and post took 9.202957153320312e-05 and allboxappend took 0.006594181060791016
(1, 21, 200, 5)
iter: 1067
sort boxes. detection was 0.011031627655029297 and post took 6.818771362304688e-05 and allboxappend took 0.006444692611694336
(1, 21, 200, 5)
iter: 1068
sort boxes. detection was 0.0695803165435791 and post took 0.0003483295440673828 and allboxappend took 0.00629878044128

../VOCdevkit/VOC2007/JPEGImages/002206.jpg
../VOCdevkit/VOC2007/JPEGImages/002207.jpg
(1, 21, 200, 5)
iter: 1105
sort boxes. detection was 0.02387213706970215 and post took 8.082389831542969e-05 and allboxappend took 0.006518125534057617
(1, 21, 200, 5)
iter: 1106
sort boxes. detection was 0.0471959114074707 and post took 0.00010824203491210938 and allboxappend took 0.0065765380859375
(1, 21, 200, 5)
iter: 1107
sort boxes. detection was 0.025117158889770508 and post took 0.00010752677917480469 and allboxappend took 0.00670623779296875
(1, 21, 200, 5)
iter: 1108
sort boxes. detection was 0.01885509490966797 and post took 8.153915405273438e-05 and allboxappend took 0.0065882205963134766
(1, 21, 200, 5)
iter: 1109
sort boxes. detection was 0.01118612289428711 and post took 7.033348083496094e-05 and allboxappend took 0.0063304901123046875
(1, 21, 200, 5)
iter: 1110
sort boxes. detection was 0.016000747680664062 and post took 8.58306884765625e-05 and allboxappend took 0.0064852237701416016


../VOCdevkit/VOC2007/JPEGImages/002298.jpg
../VOCdevkit/VOC2007/JPEGImages/002299.jpg
../VOCdevkit/VOC2007/JPEGImages/002301.jpg
../VOCdevkit/VOC2007/JPEGImages/002303.jpg
../VOCdevkit/VOC2007/JPEGImages/002304.jpg
(1, 21, 200, 5)
iter: 1151
sort boxes. detection was 0.015606403350830078 and post took 7.295608520507812e-05 and allboxappend took 0.006472349166870117
(1, 21, 200, 5)
iter: 1152
sort boxes. detection was 0.054108381271362305 and post took 9.655952453613281e-05 and allboxappend took 0.0063474178314208984
(1, 21, 200, 5)
iter: 1153
sort boxes. detection was 0.03822135925292969 and post took 8.869171142578125e-05 and allboxappend took 0.006555080413818359
(1, 21, 200, 5)
iter: 1154
sort boxes. detection was 0.025252342224121094 and post took 7.772445678710938e-05 and allboxappend took 0.006422996520996094
(1, 21, 200, 5)
iter: 1155
sort boxes. detection was 0.01235342025756836 and post took 6.103515625e-05 and allboxappend took 0.0065686702728271484
../VOCdevkit/VOC2007/JPEGI

../VOCdevkit/VOC2007/JPEGImages/002396.jpg
../VOCdevkit/VOC2007/JPEGImages/002397.jpg
../VOCdevkit/VOC2007/JPEGImages/002398.jpg
(1, 21, 200, 5)
iter: 1194
sort boxes. detection was 0.04035806655883789 and post took 9.083747863769531e-05 and allboxappend took 0.006592988967895508
(1, 21, 200, 5)
iter: 1195
sort boxes. detection was 0.06378626823425293 and post took 0.00013637542724609375 and allboxappend took 0.013541936874389648
(1, 21, 200, 5)
iter: 1196
sort boxes. detection was 0.09248733520507812 and post took 0.00010347366333007812 and allboxappend took 0.006443500518798828
../VOCdevkit/VOC2007/JPEGImages/002399.jpg
../VOCdevkit/VOC2007/JPEGImages/002400.jpg
../VOCdevkit/VOC2007/JPEGImages/002402.jpg
../VOCdevkit/VOC2007/JPEGImages/002406.jpg
../VOCdevkit/VOC2007/JPEGImages/002408.jpg
../VOCdevkit/VOC2007/JPEGImages/002409.jpg
../VOCdevkit/VOC2007/JPEGImages/002412.jpg
(1, 21, 200, 5)
iter: 1197
sort boxes. detection was 0.016492128372192383 and post took 7.224082946777344e-05 an

../VOCdevkit/VOC2007/JPEGImages/002487.jpg
../VOCdevkit/VOC2007/JPEGImages/002488.jpg
../VOCdevkit/VOC2007/JPEGImages/002489.jpg
../VOCdevkit/VOC2007/JPEGImages/002495.jpg
../VOCdevkit/VOC2007/JPEGImages/002498.jpg
../VOCdevkit/VOC2007/JPEGImages/002499.jpg
../VOCdevkit/VOC2007/JPEGImages/002503.jpg
(1, 21, 200, 5)
iter: 1237
sort boxes. detection was 0.07157373428344727 and post took 0.00010085105895996094 and allboxappend took 0.00671696662902832
(1, 21, 200, 5)
iter: 1238
sort boxes. detection was 0.015313148498535156 and post took 6.389617919921875e-05 and allboxappend took 0.0064449310302734375
(1, 21, 200, 5)
iter: 1239
sort boxes. detection was 0.03139781951904297 and post took 8.20159912109375e-05 and allboxappend took 0.00660395622253418
(1, 21, 200, 5)
iter: 1240
sort boxes. detection was 0.022011280059814453 and post took 7.271766662597656e-05 and allboxappend took 0.006395101547241211
(1, 21, 200, 5)
iter: 1241
sort boxes. detection was 0.013668060302734375 and post took 7.

../VOCdevkit/VOC2007/JPEGImages/002580.jpg
../VOCdevkit/VOC2007/JPEGImages/002581.jpg
../VOCdevkit/VOC2007/JPEGImages/002582.jpg
../VOCdevkit/VOC2007/JPEGImages/002583.jpg
../VOCdevkit/VOC2007/JPEGImages/002587.jpg
(1, 21, 200, 5)
iter: 1281
sort boxes. detection was 0.11306357383728027 and post took 0.00010228157043457031 and allboxappend took 0.006589412689208984
(1, 21, 200, 5)
iter: 1282
sort boxes. detection was 0.011639118194580078 and post took 6.127357482910156e-05 and allboxappend took 0.006453752517700195
(1, 21, 200, 5)
iter: 1283
sort boxes. detection was 0.01265263557434082 and post took 5.745887756347656e-05 and allboxappend took 0.006421566009521484
(1, 21, 200, 5)
iter: 1284
sort boxes. detection was 0.038779258728027344 and post took 8.082389831542969e-05 and allboxappend took 0.00656580924987793
(1, 21, 200, 5)
iter: 1285
sort boxes. detection was 0.08388042449951172 and post took 0.00010061264038085938 and allboxappend took 0.006540060043334961
../VOCdevkit/VOC2007/J

../VOCdevkit/VOC2007/JPEGImages/002665.jpg
../VOCdevkit/VOC2007/JPEGImages/002671.jpg
../VOCdevkit/VOC2007/JPEGImages/002672.jpg
../VOCdevkit/VOC2007/JPEGImages/002673.jpg
../VOCdevkit/VOC2007/JPEGImages/002674.jpg
../VOCdevkit/VOC2007/JPEGImages/002676.jpg
(1, 21, 200, 5)
iter: 1325
sort boxes. detection was 0.02511882781982422 and post took 0.00010585784912109375 and allboxappend took 0.006626129150390625
(1, 21, 200, 5)
iter: 1326
sort boxes. detection was 0.01136016845703125 and post took 9.965896606445312e-05 and allboxappend took 0.00633692741394043
(1, 21, 200, 5)
iter: 1327
sort boxes. detection was 0.012787818908691406 and post took 8.344650268554688e-05 and allboxappend took 0.006600618362426758
(1, 21, 200, 5)
iter: 1328
sort boxes. detection was 0.05507636070251465 and post took 0.00039839744567871094 and allboxappend took 0.00635218620300293
(1, 21, 200, 5)
iter: 1329
sort boxes. detection was 0.02581191062927246 and post took 5.793571472167969e-05 and allboxappend took 0.

(1, 21, 200, 5)
iter: 1366
sort boxes. detection was 0.016369342803955078 and post took 8.893013000488281e-05 and allboxappend took 0.0066106319427490234
(1, 21, 200, 5)
iter: 1367
sort boxes. detection was 0.019387006759643555 and post took 8.20159912109375e-05 and allboxappend took 0.006516933441162109
(1, 21, 200, 5)
iter: 1368
sort boxes. detection was 0.04964447021484375 and post took 0.00010442733764648438 and allboxappend took 0.006661415100097656
(1, 21, 200, 5)
iter: 1369
sort boxes. detection was 0.050514936447143555 and post took 0.00010156631469726562 and allboxappend took 0.0065114498138427734
(1, 21, 200, 5)
iter: 1370
sort boxes. detection was 0.03017449378967285 and post took 0.00010776519775390625 and allboxappend took 0.006499767303466797
../VOCdevkit/VOC2007/JPEGImages/002761.jpg
../VOCdevkit/VOC2007/JPEGImages/002764.jpg
(1, 21, 200, 5)
iter: 1371
sort boxes. detection was 0.012563943862915039 and post took 8.511543273925781e-05 and allboxappend took 0.0064949989318

../VOCdevkit/VOC2007/JPEGImages/002846.jpg
../VOCdevkit/VOC2007/JPEGImages/002849.jpg
../VOCdevkit/VOC2007/JPEGImages/002850.jpg
../VOCdevkit/VOC2007/JPEGImages/002851.jpg
../VOCdevkit/VOC2007/JPEGImages/002852.jpg
../VOCdevkit/VOC2007/JPEGImages/002853.jpg
../VOCdevkit/VOC2007/JPEGImages/002856.jpg
../VOCdevkit/VOC2007/JPEGImages/002857.jpg
(1, 21, 200, 5)
iter: 1413
sort boxes. detection was 0.015439510345458984 and post took 9.036064147949219e-05 and allboxappend took 0.006531476974487305
(1, 21, 200, 5)
iter: 1414
sort boxes. detection was 0.01095724105834961 and post took 6.723403930664062e-05 and allboxappend took 0.006430625915527344
(1, 21, 200, 5)
iter: 1415
sort boxes. detection was 0.01767754554748535 and post took 0.00011849403381347656 and allboxappend took 0.0065364837646484375
(1, 21, 200, 5)
iter: 1416
sort boxes. detection was 0.02292799949645996 and post took 0.0003666877746582031 and allboxappend took 0.006407260894775391
(1, 21, 200, 5)
iter: 1417
sort boxes. detect

../VOCdevkit/VOC2007/JPEGImages/002923.jpg
../VOCdevkit/VOC2007/JPEGImages/002925.jpg
../VOCdevkit/VOC2007/JPEGImages/002926.jpg
../VOCdevkit/VOC2007/JPEGImages/002927.jpg
../VOCdevkit/VOC2007/JPEGImages/002928.jpg
../VOCdevkit/VOC2007/JPEGImages/002929.jpg
(1, 21, 200, 5)
iter: 1456
sort boxes. detection was 0.05770754814147949 and post took 0.00012731552124023438 and allboxappend took 0.006867885589599609
(1, 21, 200, 5)
iter: 1457
sort boxes. detection was 0.012816190719604492 and post took 7.128715515136719e-05 and allboxappend took 0.0065343379974365234
(1, 21, 200, 5)
iter: 1458
sort boxes. detection was 0.05341458320617676 and post took 0.00010800361633300781 and allboxappend took 0.0066716670989990234
(1, 21, 200, 5)
iter: 1459
sort boxes. detection was 0.011532783508300781 and post took 9.393692016601562e-05 and allboxappend took 0.0066111087799072266
(1, 21, 200, 5)
iter: 1460
sort boxes. detection was 0.04664111137390137 and post took 0.000102996826171875 and allboxappend to

../VOCdevkit/VOC2007/JPEGImages/003022.jpg
../VOCdevkit/VOC2007/JPEGImages/003025.jpg
../VOCdevkit/VOC2007/JPEGImages/003026.jpg
../VOCdevkit/VOC2007/JPEGImages/003029.jpg
../VOCdevkit/VOC2007/JPEGImages/003030.jpg
iter: 1500
sort boxes. detection was 0.01826763153076172 and post took 0.00010204315185546875 and allboxappend took 0.006926298141479492
(1, 21, 200, 5)
iter: 1501
sort boxes. detection was 0.01159048080444336 and post took 0.00010251998901367188 and allboxappend took 0.006455183029174805
(1, 21, 200, 5)
iter: 1502
sort boxes. detection was 0.07335782051086426 and post took 0.00010824203491210938 and allboxappend took 0.006569385528564453
(1, 21, 200, 5)
iter: 1503
sort boxes. detection was 0.012232780456542969 and post took 9.012222290039062e-05 and allboxappend took 0.006552219390869141
(1, 21, 200, 5)
iter: 1504
sort boxes. detection was 0.0199429988861084 and post took 0.0003135204315185547 and allboxappend took 0.006284952163696289
../VOCdevkit/VOC2007/JPEGImages/003033

../VOCdevkit/VOC2007/JPEGImages/003114.jpg
../VOCdevkit/VOC2007/JPEGImages/003115.jpg
../VOCdevkit/VOC2007/JPEGImages/003119.jpg
../VOCdevkit/VOC2007/JPEGImages/003123.jpg
../VOCdevkit/VOC2007/JPEGImages/003125.jpg
../VOCdevkit/VOC2007/JPEGImages/003128.jpg
iter: 1544
sort boxes. detection was 0.01353597640991211 and post took 0.00025653839111328125 and allboxappend took 0.006628513336181641
(1, 21, 200, 5)
iter: 1545
sort boxes. detection was 0.03149533271789551 and post took 8.96453857421875e-05 and allboxappend took 0.006571531295776367
(1, 21, 200, 5)
iter: 1546
sort boxes. detection was 0.02427196502685547 and post took 8.344650268554688e-05 and allboxappend took 0.006648540496826172
(1, 21, 200, 5)
iter: 1547
sort boxes. detection was 0.020920515060424805 and post took 7.82012939453125e-05 and allboxappend took 0.006506204605102539
(1, 21, 200, 5)
iter: 1548
sort boxes. detection was 0.040410518646240234 and post took 0.00010895729064941406 and allboxappend took 0.006383180618286

../VOCdevkit/VOC2007/JPEGImages/003217.jpg
../VOCdevkit/VOC2007/JPEGImages/003220.jpg
(1, 21, 200, 5)
iter: 1585
sort boxes. detection was 0.01904153823852539 and post took 9.250640869140625e-05 and allboxappend took 0.006720542907714844
(1, 21, 200, 5)
iter: 1586
sort boxes. detection was 0.021540164947509766 and post took 8.58306884765625e-05 and allboxappend took 0.006563425064086914
(1, 21, 200, 5)
iter: 1587
sort boxes. detection was 0.012614011764526367 and post took 8.988380432128906e-05 and allboxappend took 0.0064373016357421875
(1, 21, 200, 5)
iter: 1588
sort boxes. detection was 0.03172016143798828 and post took 6.914138793945312e-05 and allboxappend took 0.006497383117675781
(1, 21, 200, 5)
iter: 1589
sort boxes. detection was 0.015455484390258789 and post took 5.7220458984375e-05 and allboxappend took 0.006368398666381836
(1, 21, 200, 5)
iter: 1590
sort boxes. detection was 0.012717485427856445 and post took 5.8650970458984375e-05 and allboxappend took 0.006450414657592773

../VOCdevkit/VOC2007/JPEGImages/003295.jpg
../VOCdevkit/VOC2007/JPEGImages/003297.jpg
../VOCdevkit/VOC2007/JPEGImages/003298.jpg
../VOCdevkit/VOC2007/JPEGImages/003302.jpg
../VOCdevkit/VOC2007/JPEGImages/003304.jpg
../VOCdevkit/VOC2007/JPEGImages/003305.jpg
(1, 21, 200, 5)
iter: 1629
sort boxes. detection was 0.05574941635131836 and post took 0.00011372566223144531 and allboxappend took 0.006719350814819336
(1, 21, 200, 5)
iter: 1630
sort boxes. detection was 0.06299972534179688 and post took 0.00010132789611816406 and allboxappend took 0.006571769714355469
(1, 21, 200, 5)
iter: 1631
sort boxes. detection was 0.018105268478393555 and post took 7.343292236328125e-05 and allboxappend took 0.006509304046630859
(1, 21, 200, 5)
iter: 1632
sort boxes. detection was 0.022095441818237305 and post took 0.00010132789611816406 and allboxappend took 0.006525993347167969
(1, 21, 200, 5)
iter: 1633
sort boxes. detection was 0.02922821044921875 and post took 6.985664367675781e-05 and allboxappend too

../VOCdevkit/VOC2007/JPEGImages/003375.jpg
../VOCdevkit/VOC2007/JPEGImages/003378.jpg
../VOCdevkit/VOC2007/JPEGImages/003381.jpg
../VOCdevkit/VOC2007/JPEGImages/003383.jpg
../VOCdevkit/VOC2007/JPEGImages/003384.jpg
../VOCdevkit/VOC2007/JPEGImages/003385.jpg
../VOCdevkit/VOC2007/JPEGImages/003387.jpg
../VOCdevkit/VOC2007/JPEGImages/003388.jpg
(1, 21, 200, 5)
iter: 1672
sort boxes. detection was 0.020594120025634766 and post took 8.845329284667969e-05 and allboxappend took 0.006547451019287109
(1, 21, 200, 5)
iter: 1673
sort boxes. detection was 0.0223236083984375 and post took 8.249282836914062e-05 and allboxappend took 0.00636744499206543
(1, 21, 200, 5)
iter: 1674
sort boxes. detection was 0.016191720962524414 and post took 7.772445678710938e-05 and allboxappend took 0.0065326690673828125
(1, 21, 200, 5)
iter: 1675
sort boxes. detection was 0.025520801544189453 and post took 8.96453857421875e-05 and allboxappend took 0.0065195560455322266
(1, 21, 200, 5)
iter: 1676
sort boxes. detecti

In [None]:
all_boxes[7][0:10]

# eval accuracy

In [None]:
# eval function
def voc_eval(detpath,
             annopath,
             imagesetfile,
             classname,
             cachedir,
             ovthresh=0.5,
             use_07_metric=False):
  """
  rec, prec, ap = voc_eval(detpath,
                              annopath,
                              imagesetfile,
                              classname,
                              [ovthresh],
                              [use_07_metric])
  Top level function that does the PASCAL VOC evaluation.
  detpath: Path to detections
      detpath.format(classname) should produce the detection results file.
  annopath: Path to annotations
      annopath.format(imagename) should be the xml annotations file.
  imagesetfile: Text file containing the list of images, one image per line.
  classname: Category name (duh)
  cachedir: Directory for caching the annotations
  [ovthresh]: Overlap threshold (default = 0.5)
  [use_07_metric]: Whether to use VOC07's 11 point AP computation
      (default False)
  """
  # assumes detections are in detpath.format(classname)
  # assumes annotations are in annopath.format(imagename)
  # assumes imagesetfile is a text file with each line an image name
  # cachedir caches the annotations in a pickle file

  # first load gt
  if not os.path.isdir(cachedir):
    os.mkdir(cachedir)
  cachefile = os.path.join(cachedir, '%s_annots.pkl' % imagesetfile)
  # read list of images
  with open(imagesetfile, 'r') as f:
    lines = f.readlines()
  imagenames = [x.strip() for x in lines]

  if not os.path.isfile(cachefile):
    # load annotations
    recs = {}
    for i, imagename in enumerate(imagenames):
      recs[imagename] = parse_rec(annopath.format(imagename))
      if i % 100 == 0:
        print('Reading annotation for {:d}/{:d}'.format(
          i + 1, len(imagenames)))
    # save
    #print('Saving cached annotations to {:s}'.format(cachefile))
    #with open(cachefile, 'wb') as f:
    #  pickle.dump(recs, f)
  else:
    # load
    with open(cachefile, 'rb') as f:
      try:
        recs = pickle.load(f)
      except:
        recs = pickle.load(f, encoding='bytes')

  # extract gt objects for this class
  class_recs = {}
  npos = 0
  for imagename in imagenames:
    R = [obj for obj in recs[imagename] if obj['name'] == classname]
    bbox = np.array([x['bbox'] for x in R])
    difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
    det = [False] * len(R)
    npos = npos + sum(~difficult)
    class_recs[imagename] = {'bbox': bbox,
                             'difficult': difficult,
                             'det': det}

  # read dets
  detfile = detpath.format(classname)
  with open(detfile, 'r') as f:
    lines = f.readlines()

  splitlines = [x.strip().split(' ') for x in lines]
  image_ids = [x[0] for x in splitlines]
  confidence = np.array([float(x[1]) for x in splitlines])
  BB = np.array([[float(z) for z in x[2:]] for x in splitlines])

  nd = len(image_ids)
  tp = np.zeros(nd)
  fp = np.zeros(nd)

  if BB.shape[0] > 0:
    # sort by confidence
    sorted_ind = np.argsort(-confidence)
#    sorted_scores = np.sort(-confidence)
    BB = BB[sorted_ind, :]
    image_ids = [image_ids[x] for x in sorted_ind]

    # go down dets and mark TPs and FPs
    for d in range(nd):
      id = image_ids[d][-10:-4]
      #print(id)
      # catch bad detections
      try:
          R = class_recs[id]
      except:
        print("det not found")
        continue
        
      bb = BB[d, :].astype(float)
      ovmax = -np.inf
      BBGT = R['bbox'].astype(float)

      if BBGT.size > 0:
        # compute overlaps
        # intersection
        ixmin = np.maximum(BBGT[:, 0], bb[0])
        iymin = np.maximum(BBGT[:, 1], bb[1])
        ixmax = np.minimum(BBGT[:, 2], bb[2])
        iymax = np.minimum(BBGT[:, 3], bb[3])
        iw = np.maximum(ixmax - ixmin + 1., 0.)
        ih = np.maximum(iymax - iymin + 1., 0.)
        inters = iw * ih

        # union
        uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
               (BBGT[:, 2] - BBGT[:, 0] + 1.) *
               (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)

        overlaps = inters / uni
        ovmax = np.max(overlaps)
        jmax = np.argmax(overlaps)

      if ovmax > ovthresh:
        if not R['difficult'][jmax]:
          if not R['det'][jmax]:
            tp[d] = 1.
            R['det'][jmax] = 1
          else:
            fp[d] = 1.
      else:
        fp[d] = 1.

  # compute precision recall
  fp = np.cumsum(fp)
  tp = np.cumsum(tp)
  rec = tp / float(npos)
  # avoid divide by zero in case the first detection matches a difficult
  # ground truth
  prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
  ap = voc_ap(rec, prec, use_07_metric)

  return rec, prec, ap

In [None]:
pascal_classes = np.asarray(["__background__", 'aeroplane', 'bicycle', 'bird', 'boat',
               'bottle', 'bus', 'car', 'cat', 'chair',
               'cow', 'diningtable', 'dog', 'horse',
               'motorbike', 'person', 'pottedplant',
               'sheep', 'sofa', 'train', 'tvmonitor'])

PASCAL_CLASSES = pascal_classes

# write out detections for evaluation

In [None]:
import os 
def get_voc_results_file_template(cls):
        # VOCdevkit/results/VOC2007/Main/<comp_id>_det_test_aeroplane.txt
        filename = 'det_' + "val" + '_'+cls+'.txt'
        filedir = os.path.join(DEVKIT_PATH, 'results', 'VOC2007', 'Main')
        if not os.path.exists(filedir):
            os.makedirs(filedir)
        path = os.path.join(filedir, filename)
        return path


def write_voc_results_file(pascal_classes, all_boxes, image_index):
        for cls_ind, cls in enumerate(pascal_classes):
            if cls == '__background__':
                continue
            print('Writing {} VOC results file'.format(cls))
            filename = get_voc_results_file_template(cls)
            with open(filename, 'wt') as f:
                for im_ind, index in enumerate(image_index):
                    dets = np.asarray(all_boxes[cls_ind][im_ind])
                    if dets == []:
                        continue
                    # the VOCdevkit expects 1-based indices
                    for k in range(dets.shape[0]):
                        #print(dets[k, 0])
                        f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.
                                format(index, dets[k, 0],
                                       dets[k, 1] + 1, dets[k, 2] + 1,
                                       dets[k, 3] + 1, dets[k, 4] + 1))
import xml.etree.ElementTree as ET
def parse_rec(filename):
  """ Parse a PASCAL VOC xml file """
  tree = ET.parse(filename)
  objects = []
  for obj in tree.findall('object'):
    obj_struct = {}
    obj_struct['name'] = obj.find('name').text
    obj_struct['pose'] = obj.find('pose').text
    obj_struct['truncated'] = int(obj.find('truncated').text)
    obj_struct['difficult'] = int(obj.find('difficult').text)
    bbox = obj.find('bndbox')
    obj_struct['bbox'] = [int(bbox.find('xmin').text),
                          int(bbox.find('ymin').text),
                          int(bbox.find('xmax').text),
                          int(bbox.find('ymax').text)]
    objects.append(obj_struct)

  return objects
def voc_ap(rec, prec, use_07_metric=False):
  """ ap = voc_ap(rec, prec, [use_07_metric])
  Compute VOC AP given precision and recall.
  If use_07_metric is true, uses the
  VOC 07 11 point method (default:False).
  """
  if use_07_metric:
    # 11 point metric
    ap = 0.
    for t in np.arange(0., 1.1, 0.1):
      if np.sum(rec >= t) == 0:
        p = 0
      else:
        p = np.max(prec[rec >= t])
      ap = ap + p / 11.
  else:
    # correct AP calculation
    # first append sentinel values at the end
    mrec = np.concatenate(([0.], rec, [1.]))
    mpre = np.concatenate(([0.], prec, [0.]))

    # compute the precision envelope
    for i in range(mpre.size - 1, 0, -1):
      mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])

    # to calculate area under PR curve, look for points
    # where X axis (recall) changes value
    i = np.where(mrec[1:] != mrec[:-1])[0]

    # and sum (\Delta recall) * prec
    ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
  return ap

In [None]:
write_voc_results_file(pascal_classes, all_boxes, val_img_list)

# evaluation

In [None]:
def python_eval(output_dir='output'):
        annopath = os.path.join(
            DEVKIT_PATH,
            'VOC2007',
            'Annotations',
            '{:s}.xml')
        imagesetfile = os.path.join(
            DEVKIT_PATH,
            'VOC2007',
            'ImageSets',
            'Main',
            SET + '.txt')
        cachedir = os.path.join(DEVKIT_PATH, 'annotations_cache')
        aps = []
        # The PASCAL VOC metric changed in 2010.
        # VOC07 metric is quite old so don't use.
        use_07_metric = False
        print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
        if not os.path.isdir(output_dir):
            os.mkdir(output_dir)
        for i, cls in enumerate(PASCAL_CLASSES):
            if cls == '__background__':
                continue
            filename = get_voc_results_file_template(cls)
            rec, prec, ap = voc_eval(
                filename, annopath, imagesetfile, cls, cachedir, ovthresh=0.5,
                use_07_metric=use_07_metric)
            aps += [ap]
            print('AP for {} = {:.4f}'.format(cls, ap))
            with open(os.path.join(output_dir, cls + '_pr.pkl'), 'wb') as f:
                pickle.dump({'rec': rec, 'prec': prec, 'ap': ap}, f)
        print('Mean AP = {:.4f}'.format(np.mean(aps)))
        print('~~~~~~~~')
        print('Results:')
        for ap in aps:
            print('{:.3f}'.format(ap))
        print('{:.3f}'.format(np.mean(aps)))
        print('~~~~~~~~')
        print('')
        print('--------------------------------------------------------------')
        print('Results computed with the **unofficial** Python eval code.')
        print('Results should be very close to the official MATLAB eval code.')
        print('Recompute with `./tools/reval.py --matlab ...` for your paper.')
        print('-- Thanks, The Management')
        print('--------------------------------------------------------------')

In [None]:
# evaluate detections
python_eval()