In [1]:
import os
import random
import argparse
import time
import math
import numpy as np

import torch
import torch.optim as optim
import torch.backends.cudnn as cudnn

from data import *
import tools

from utils.augmentations import SSDAugmentation
from utils.cocoapi_evaluator import COCOAPIEvaluator
from utils.vocapi_evaluator import VOCAPIEvaluator

In [6]:
VOC_ROOT = '/Users/lan/Downloads/VOCdevkit'
data_dir = VOC_ROOT
num_classes = 20
train_size = 416
val_size = 416
VOC_CLASSES = (  # always index 0
    'aeroplane', 'bicycle', 'bird', 'boat',
    'bottle', 'bus', 'car', 'cat', 'chair',
    'cow', 'diningtable', 'dog', 'horse',
    'motorbike', 'person', 'pottedplant',
    'sheep', 'sofa', 'train', 'tvmonitor')
device = 'cpu'

In [7]:
evaluator = VOCAPIEvaluator(data_root=data_dir,
                                    img_size=val_size, # val_size是416（意思是图片大小416*416）
                                    device=device,
                                    transform=BaseTransform(val_size), # 预测时只需要做基本的数据增强（归一化），跟训练不一样
                                    labelmap=VOC_CLASSES
                                    )

In [13]:
from models.yolo import myYOLO
yolo_net = myYOLO(device, input_size=train_size, num_classes=num_classes, trainable=True)
model = yolo_net
model.load_state_dict(torch.load('yolo_150.pth', map_location=device))
model.eval()
a=1

In [16]:
self = evaluator
num_images = len(self.dataset)
num_images

4952

In [20]:
self.all_boxes = [[[] for _ in range(num_images)] for _ in range(len(self.labelmap))]
len(self.all_boxes), len(self.all_boxes[0])

(20, 4952)

In [21]:
self.output_dir

'voc_eval/test'

In [22]:
det_file = os.path.join(self.output_dir, 'detections.pkl')

In [32]:
model.trainable = False

In [35]:
import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
from torch.autograd import Variable
from data import VOCDetection
import sys
import os
import time
import numpy as np
import pickle
im, gt, h, w = self.dataset.pull_item(0)
x = Variable(im.unsqueeze(0)).to(self.device)
t0 = time.time()

In [38]:
model.trainable = False
net = model
bboxes, scores, cls_inds = net(x) # 此时得到的bboxes是经过nms的

In [40]:
detect_time = time.time() - t0
detect_time

56.23599410057068

In [47]:
scale = np.array([[w, h, w, h]])  # 原始图片的宽高，不是416
scale

array([[353, 500, 353, 500]])

In [48]:
bboxes *= scale 

In [69]:
inds = np.where(cls_inds == 14)[0]
inds

array([0, 1, 2])

In [70]:
c_bboxes = bboxes[inds]
c_scores = scores[inds]
c_bboxes.shape, c_scores.shape

((3, 4), (3,))

In [75]:
c_dets = np.hstack((c_bboxes,
                                    c_scores[:, np.newaxis])).astype(np.float32,
                                                                    copy=False)
c_dets.shape                                                                

(3, 5)

In [77]:
self.all_boxes[14][0] = c_dets

In [78]:
for i in range(num_images): # num_images=4952
            im, gt, h, w = self.dataset.pull_item(i) # 从数据集里一张张的抽出图像数据

            # unsqueeze(0)表示在第0维度插入一个维度
            # im.shape: torch.Size([3, 416, 416])
            # im.unsqueeze(0): torch.Size([1, 3, 416, 416])
            # Variable可以把输出的Tensor变成一个输入变量，这样梯度就不会回传了。detach()也是可以的
            x = Variable(im.unsqueeze(0)).to(self.device)
            t0 = time.time()
            # forward
            bboxes, scores, cls_inds = net(x) # 注意这个时候的net(x)的trainnable=False，所以是预测功能
            # 周四01:22看到这
            detect_time = time.time() - t0
            scale = np.array([[w, h, w, h]]) # shape:(1,4)
            # bboxes:(13, 4) * scale:(1,4) = (13, 4)
            # 这里的13是指处理后还剩下13个框
            bboxes *= scale 

            for j in range(len(self.labelmap)):
                inds = np.where(cls_inds == j)[0]
                if len(inds) == 0:
                    self.all_boxes[j][i] = np.empty([0, 5], dtype=np.float32)
                    continue
                c_bboxes = bboxes[inds]
                c_scores = scores[inds]
                c_dets = np.hstack((c_bboxes,
                                    c_scores[:, np.newaxis])).astype(np.float32,
                                                                    copy=False)
                self.all_boxes[j][i] = c_dets

In [79]:
with open(det_file, 'wb') as f:
            pickle.dump(self.all_boxes, f, pickle.HIGHEST_PROTOCOL)

In [80]:
self.output_dir

'voc_eval/test'

In [84]:
self.devkit_path

'/Users/lan/Downloads/VOCdevkitVOC2007'

In [86]:
for im_ind, index in enumerate(self.dataset.ids):
    print(im_ind, index)
    break

0 ('/Users/lan/Downloads/VOCdevkit/VOC2007', '000001')


In [88]:
for cls_ind, cls in enumerate(self.labelmap):
    print(cls_ind, cls)
    break

0 aeroplane


In [90]:
filename = self.get_voc_results_file_template('aeroplane')
filename

'/Users/lan/Downloads/VOCdevkitVOC2007/results/det_test_aeroplane.txt'

In [94]:
all_boxes = self.all_boxes
dets = all_boxes[cls_ind][im_ind]
dets

array([], shape=(0, 5), dtype=float32)

In [95]:
for cls_ind, cls in enumerate(self.labelmap):
            if self.display:
                print('Writing {:s} VOC results file'.format(cls))
            filename = self.get_voc_results_file_template(cls)
            with open(filename, 'wt') as f:
                for im_ind, index in enumerate(self.dataset.ids):
                    dets = all_boxes[cls_ind][im_ind]
                    if dets == []:
                        continue
                    # the VOCdevkit expects 1-based indices
                    for k in range(dets.shape[0]):
                        f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.
                                format(index[1], dets[k, -1], # 000001 score xmin+1 ymin+1 xmax+1 ymax+1
                                    dets[k, 0] + 1, dets[k, 1] + 1,
                                    dets[k, 2] + 1, dets[k, 3] + 1))

  if dets == []:


In [96]:
filename

'/Users/lan/Downloads/VOCdevkitVOC2007/results/det_test_tvmonitor.txt'

In [107]:
use_07=True
cachedir = os.path.join(self.devkit_path, 'annotations_cache')
cachedir
aps = []
use_07_metric = use_07
print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
self.output_dir
os.path.isdir(self.output_dir)
for i, cls in enumerate(self.labelmap):
    print(i, cls)
    break
filename = self.get_voc_results_file_template('aeroplane')
filename

VOC07 metric? Yes
0 aeroplane


'/Users/lan/Downloads/VOCdevkitVOC2007/results/det_test_aeroplane.txt'

In [108]:
detpath=filename
classname=cls
cachedir=cachedir
ovthresh=0.5
use_07_metric=use_07_metric

In [112]:
if not os.path.isdir(cachedir):
            os.mkdir(cachedir)
os.path.isdir(cachedir)
cachefile = os.path.join(cachedir, 'annots.pkl')

In [113]:
self.imgsetpath

'/Users/lan/Downloads/VOCdevkit/VOC2007/ImageSets/Main/test.txt'

In [114]:
with open(self.imgsetpath, 'r') as f:
            lines = f.readlines()

In [118]:
len(lines)
imagenames = [x.strip() for x in lines]
imagenames[0]
lines[0]

'000001\n'

In [119]:
os.path.isfile(cachefile)

False

In [121]:
recs = {}
for i, imagename in enumerate(imagenames):
    print(i, imagename)
    break

0 000001


In [122]:
self.annopath

'/Users/lan/Downloads/VOCdevkit/VOC2007/Annotations/%s.xml'

In [123]:
recs[imagename] = self.parse_rec(self.annopath % (imagename))

In [124]:
recs[imagename]

[{'name': 'dog',
  'pose': 'Left',
  'truncated': 1,
  'difficult': 0,
  'bbox': [48, 240, 195, 371]},
 {'name': 'person',
  'pose': 'Left',
  'truncated': 1,
  'difficult': 0,
  'bbox': [8, 12, 352, 498]}]

In [125]:
for i, imagename in enumerate(imagenames):
                recs[imagename] = self.parse_rec(self.annopath % (imagename))

In [127]:
with open(cachefile, 'wb') as f:
                pickle.dump(recs, f)
cachefile

'/Users/lan/Downloads/VOCdevkitVOC2007/annotations_cache/annots.pkl'

In [133]:
classname

'aeroplane'

In [134]:
class_recs = {}
npos = 0
#imagenames
for imagename in imagenames:
    print(imagename)
    break


R = [obj for obj in recs['000001'] if obj['name'] == classname]

000001


In [135]:
bbox = np.array([x['bbox'] for x in R])
bbox

array([], dtype=float64)

In [136]:
difficult = np.array([x['difficult'] for x in R]).astype(np.bool)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  difficult = np.array([x['difficult'] for x in R]).astype(np.bool)


In [140]:
for imagename in imagenames:
            # recs是一个字典，recs['000001']的结果也是字典，可能有多个obj，这些obj也是字典，通过obj['name']判断是否为aeroplane等，是的话取出来
            R = [obj for obj in recs[imagename] if obj['name'] == classname]
            bbox = np.array([x['bbox'] for x in R])
            difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
            det = [False] * len(R)  # [False] * 2 -> [False, False]
            npos = npos + sum(~difficult) # 基本都为0/False，自增，~difficult取反,统计样本个数
            class_recs[imagename] = {'bbox': bbox,
                                    'difficult': difficult,
                                    'det': det}

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  difficult = np.array([x['difficult'] for x in R]).astype(np.bool)


In [151]:
class_recs['000015']

{'bbox': array([], dtype=float64),
 'difficult': array([], dtype=bool),
 'det': []}

In [154]:
detfile = detpath.format(classname)
detfile
with open(detfile, 'r') as f:
            lines = f.readlines()
            

In [157]:
len(lines), npos

(911, 285)

In [159]:
splitlines = [x.strip().split(' ') for x in lines]
splitlines[0]

['000067', '0.855', '26.1', '63.7', '423.9', '218.6']

In [161]:
image_ids = [x[0] for x in splitlines]
image_ids[0]

'000067'

In [163]:
confidence = np.array([float(x[1]) for x in splitlines])
confidence[0]

0.855

In [165]:
BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
BB[0]

array([ 26.1,  63.7, 423.9, 218.6])

In [169]:
sorted_ind = np.argsort(-confidence)
sorted_scores = np.sort(-confidence)
BB = BB[sorted_ind, :]
image_ids = [image_ids[x] for x in sorted_ind]

In [177]:
nd = len(image_ids)
nd
tp = np.zeros(nd)
tp.shape
fp = np.zeros(nd)

In [179]:
d=0
R = class_recs[image_ids[d]]

In [180]:
image_ids[d]

'000968'

In [188]:
R = class_recs[image_ids[d]]
R
ovmax = -np.inf
BBGT = R['bbox'].astype(float)
BBGT

array([[ 17.,  64., 472., 231.]])

In [189]:
BBGT.size > 0

True

In [183]:
bb = BB[d, :].astype(float)
bb

array([378.8, 172.4, 488.9, 225.2])