In [1]:
#encoding:utf-8
#
#created by xiongzihua
#
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import numpy as np
VOC_CLASSES = (    # always index 0
    'aeroplane', 'bicycle', 'bird', 'boat',
    'bottle', 'bus', 'car', 'cat', 'chair',
    'cow', 'diningtable', 'dog', 'horse',
    'motorbike', 'person', 'pottedplant',
    'sheep', 'sofa', 'train', 'tvmonitor')
Color = [[0, 0, 0],
                    [128, 0, 0],
                    [0, 128, 0],
                    [128, 128, 0],
                    [0, 0, 128],
                    [128, 0, 128],
                    [0, 128, 128],
                    [128, 128, 128],
                    [64, 0, 0],
                    [192, 0, 0],
                    [64, 128, 0],
                    [192, 128, 0],
                    [64, 0, 128],
                    [192, 0, 128],
                    [64, 128, 128],
                    [192, 128, 128],
                    [0, 64, 0],
                    [128, 64, 0],
                    [0, 192, 0],
                    [128, 192, 0],
                    [0, 64, 128]]

In [2]:
def voc_ap(rec,prec,use_07_metric=False):
    if use_07_metric:
        # 11 point metric
        ap = 0.
        for t in np.arange(0.,1.1,0.1):
            if np.sum(rec >= t) == 0:
                p = 0
            else:
                p = np.max(prec[rec>=t])
            ap = ap + p/11.

    else:
        # correct ap caculation
        mrec = np.concatenate(([0.],rec,[1.]))
        mpre = np.concatenate(([0.],prec,[0.]))

        for i in range(mpre.size -1, 0, -1):
            mpre[i-1] = np.maximum(mpre[i-1],mpre[i])

        i = np.where(mrec[1:] != mrec[:-1])[0]

        ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])

    return ap

In [3]:
def voc_eval(preds,target,VOC_CLASSES=VOC_CLASSES,threshold=0.5,use_07_metric=False,):
    '''
    preds {'cat':[[image_id,confidence,x1,y1,x2,y2],...],'dog':[[],...]}
    target {(image_id,class):[[],]}
    '''
    aps = []
    for i,class_ in enumerate(VOC_CLASSES):
        pred = preds[class_] #[[image_id,confidence,x1,y1,x2,y2],...]
        if len(pred) == 0: #如果这个类别一个都没有检测到的异常情况
            ap = -1
            print('---class {} ap {}---'.format(class_,ap))
            aps += [ap]
            break
        #print(pred)
        image_ids = [x[0] for x in pred]
        confidence = np.array([float(x[1]) for x in pred])
        BB = np.array([x[2:] for x in pred])
        # sort by confidence
        sorted_ind = np.argsort(-confidence)
        sorted_scores = np.sort(-confidence)
        BB = BB[sorted_ind, :]
        image_ids = [image_ids[x] for x in sorted_ind]

        # go down dets and mark TPs and FPs
        npos = 0.
        for (key1,key2) in target:
            if key2 == class_:
                npos += len(target[(key1,key2)]) #统计这个类别的正样本，在这里统计才不会遗漏
        nd = len(image_ids)
        tp = np.zeros(nd)
        fp = np.zeros(nd)
        for d,image_id in enumerate(image_ids):
            bb = BB[d] #预测框
            if (image_id,class_) in target:
                BBGT = target[(image_id,class_)] #[[],]
                for bbgt in BBGT:
                    # compute overlaps
                    # intersection
                    ixmin = np.maximum(bbgt[0], bb[0])
                    iymin = np.maximum(bbgt[1], bb[1])
                    ixmax = np.minimum(bbgt[2], bb[2])
                    iymax = np.minimum(bbgt[3], bb[3])
                    iw = np.maximum(ixmax - ixmin + 1., 0.)
                    ih = np.maximum(iymax - iymin + 1., 0.)
                    inters = iw * ih

                    union = (bb[2]-bb[0]+1.)*(bb[3]-bb[1]+1.) + (bbgt[2]-bbgt[0]+1.)*(bbgt[3]-bbgt[1]+1.) - inters
                    if union == 0:
                        print(bb,bbgt)
                    
                    overlaps = inters/union
                    if overlaps > threshold:
                        tp[d] = 1
                        BBGT.remove(bbgt) #这个框已经匹配到了，不能再匹配
                        if len(BBGT) == 0:
                            del target[(image_id,class_)] #删除没有box的键值
                        break
                fp[d] = 1-tp[d]
            else:
                fp[d] = 1
        fp = np.cumsum(fp)
        tp = np.cumsum(tp)
        rec = tp/float(npos)
        prec = tp/np.maximum(tp + fp, np.finfo(np.float64).eps)
        #print(rec,prec)
        ap = voc_ap(rec, prec, use_07_metric)
        print('---class {} ap {}---'.format(class_,ap))
        aps += [ap]
    print('---map {}---'.format(np.mean(aps)))

def test_eval():
    preds = {'cat':[['image01',0.9,20,20,40,40],['image01',0.8,20,20,50,50],['image02',0.8,30,30,50,50]],'dog':[['image01',0.78,60,60,90,90]]}
    target = {('image01','cat'):[[20,20,41,41]],('image01','dog'):[[60,60,91,91]],('image02','cat'):[[30,30,51,51]]}
    voc_eval(preds,target,VOC_CLASSES=['cat','dog'])

if __name__ == '__main__':
    #test_eval()
    from predict import *
    from collections import defaultdict
    from tqdm import tqdm
#     from resnet import resnet18

    target =  defaultdict(list)
    preds = defaultdict(list)
    image_list = [] #image path list

    f = open('voc2012train.txt')
    lines = f.readlines()
    file_list = []
    for line in lines:
        splited = line.strip().split()
        file_list.append(splited)
    f.close()
    print('---prepare target---')
    for image_file in tqdm(file_list):
        image_id = image_file[0]
        image_list.append(image_id)
        num_obj = int(image_file[1])
        for i in range(num_obj):
            x1 = int(image_file[2+5*i])
            y1 = int(image_file[3+5*i])
            x2 = int(image_file[4+5*i])
            y2 = int(image_file[5+5*i])
            c = int(image_file[6+5*i])
            class_name = VOC_CLASSES[c]
            target[(image_id,class_name)].append([x1,y1,x2,y2])
    #
    #start test
    #
    print('---start test---')
    model = vgg16(pretrained=False)
    model.classifier = nn.Sequential(
                nn.Linear(512 * 7 * 7, 4096),
                nn.ReLU(True),
                nn.Dropout(),
                #nn.Linear(4096, 4096),
                #nn.ReLU(True),
                #nn.Dropout(),
                nn.Linear(4096, 1470),
            )
    '''model = resnet18(pretrained=False)
    model.fc = nn.Linear(512,1470)'''
    model.load_state_dict(torch.load('best.pth'))
    model.eval()
    model.cuda()
    count = 0
    for image_path in tqdm(image_list):
        result = predict_gpu(model,image_path,root_path='VOC_Datasets/VOC_2012_training/VOC2012/JPEGImages/') #result[[left_up,right_bottom,class_name,image_path],]
        for (x1,y1),(x2,y2),class_name,image_id,prob in result: #image_id is actually image_path
            preds[class_name].append([image_id,prob,x1,y1,x2,y2])
        '''image = cv2.imread('/home/xzh/codedata/voc2012train/JPEGImages/'+image_path)
        for left_up,right_bottom,class_name,_,prob in result:
            color = Color[VOC_CLASSES.index(class_name)]
            cv2.rectangle(image,left_up,right_bottom,color,2)
            cv2.putText(image,class_name,left_up,cv2.FONT_HERSHEY_SIMPLEX,1,color,1,cv2.LINE_AA)
            #print(prob)

            cv2.imwrite('testimg/'+image_path,image)
        count += 1
        if count == 50:
            break'''
    
    print('---start evaluate---')
    voc_eval(preds,target,VOC_CLASSES=VOC_CLASSES)

 29%|██▉       | 4981/17125 [00:00<00:00, 49802.09it/s]

---prepare target---


100%|██████████| 17125/17125 [00:00<00:00, 80314.98it/s]


---start test---


100%|██████████| 17125/17125 [56:19<00:00,  5.07it/s]


---start evaluate---
---class aeroplane ap 0.579636768146---
---class bicycle ap 0.258959356639---
---class bird ap 0.459492122384---
---class boat ap 0.165528609461---
---class bottle ap 0.101808625011---
---class bus ap 0.379867176045---
---class car ap 0.214031003989---
---class cat ap 0.72977170081---
---class chair ap 0.110499764521---
---class cow ap 0.195263508003---
---class diningtable ap 0.194013161639---
---class dog ap 0.61706553645---
---class horse ap 0.309497649857---
---class motorbike ap 0.299944176092---
---class person ap 0.434832707718---
---class pottedplant ap 0.0425749939105---
---class sheep ap 0.195225282748---
---class sofa ap 0.134806261539---
---class train ap 0.528307774336---
---class tvmonitor ap 0.22510335636---
---map 0.308811476783---


In [4]:
target =  defaultdict(list)
preds = defaultdict(list)
image_list = [] #image path list

f = open('voc2007test.txt')
lines = f.readlines()
file_list = []
for line in lines:
    splited = line.strip().split()
    file_list.append(splited)
f.close()
print('---prepare target---')
for image_file in tqdm(file_list):
    image_id = image_file[0]
    image_list.append(image_id)
    num_obj = int(image_file[1])
    for i in range(num_obj):
        x1 = int(image_file[2+5*i])
        y1 = int(image_file[3+5*i])
        x2 = int(image_file[4+5*i])
        y2 = int(image_file[5+5*i])
        c = int(image_file[6+5*i])
        class_name = VOC_CLASSES[c]
        target[(image_id,class_name)].append([x1,y1,x2,y2])
#
#start test
#
print('---start test---')
model = vgg16(pretrained=False)
model.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            #nn.Linear(4096, 4096),
            #nn.ReLU(True),
            #nn.Dropout(),
            nn.Linear(4096, 1470),
        )
'''model = resnet18(pretrained=False)
model.fc = nn.Linear(512,1470)'''
model.load_state_dict(torch.load('best.pth'))
model.eval()
model.cuda()
count = 0
for image_path in tqdm(image_list):
    result = predict_gpu(model,image_path,root_path='VOC_Datasets/VOC_2007_testing/VOCdevkit/VOC2007/JPEGImages/') #result[[left_up,right_bottom,class_name,image_path],]
    for (x1,y1),(x2,y2),class_name,image_id,prob in result: #image_id is actually image_path
        preds[class_name].append([image_id,prob,x1,y1,x2,y2])
    '''image = cv2.imread('/home/xzh/codedata/voc2012train/JPEGImages/'+image_path)
    for left_up,right_bottom,class_name,_,prob in result:
        color = Color[VOC_CLASSES.index(class_name)]
        cv2.rectangle(image,left_up,right_bottom,color,2)
        cv2.putText(image,class_name,left_up,cv2.FONT_HERSHEY_SIMPLEX,1,color,1,cv2.LINE_AA)
        #print(prob)

        cv2.imwrite('testimg/'+image_path,image)
    count += 1
    if count == 50:
        break'''

print('---start evaluate---')
voc_eval(preds,target,VOC_CLASSES=VOC_CLASSES)

100%|██████████| 4952/4952 [00:00<00:00, 75249.42it/s]

---prepare target---
---start test---



100%|██████████| 4952/4952 [14:03<00:00,  5.87it/s]


---start evaluate---
---class aeroplane ap 0.370352480334---
---class bicycle ap 0.161536258038---
---class bird ap 0.148747115061---
---class boat ap 0.0346886877337---
---class bottle ap 0.0155894468143---
---class bus ap 0.220445433956---
---class car ap 0.174616652479---
---class cat ap 0.432814373807---
---class chair ap 0.0126173896903---
---class cow ap 0.0323604675808---
---class diningtable ap 0.0678276808754---
---class dog ap 0.311612657387---
---class horse ap 0.0677364187076---
---class motorbike ap 0.104779725192---
---class person ap 0.140374242407---
---class pottedplant ap 0.00580357142857---
---class sheep ap 0.054514300958---
---class sofa ap 0.0562744112173---
---class train ap 0.321907525909---
---class tvmonitor ap 0.0486455419197---
---map 0.139162219075---
