In [1]:
%matplotlib inline
from refer import REFER
import numpy as np
import sys
import os.path as osp
import json
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle

In [2]:
data_root = './data'  # contains refclef, refcoco, refcoco+, refcocog and images
dataset = 'refcoco'
splitBy = 'unc'
refer = REFER(data_root, dataset, splitBy)

loading dataset refcoco into memory...
creating index...
index created.
DONE (t=6.84s)


# 1. Evaluate Refering Expressions by Language Metrics

In [3]:
from evaluation.refEvaluation import RefEvaluation

In [4]:
# Here's our example expression file
sample_expr_file = json.load(open('test/sample_expressions_testA.json', 'r'))
sample_exprs = sample_expr_file['predictions']
print (sample_exprs[0])

{'sent': 'man in black', 'ref_id': 47}


In [5]:
refEval = RefEvaluation(refer, sample_exprs)
refEval.evaluate()

tokenization...


PTBTokenizer tokenized 6983 tokens at 59805.13 tokens per second.


----lines: b'man in black\nperson on right\nwoman in red\ncar behind bike\ncar on left\nman in blue\nman in white\nleft person\nman on right\nperson in background\nperson on left\nman in white\nguy in white\nguy in red\nwhite shirt\nplayer in white\nred shirt\ngirl\nbaby\nbaby\nwoman in front\ngirl\nright guy\nman in white\nmiddle guy\nwoman\nman on right\nwoman\nman in white\nman in white shirt\nwoman on right\nman in red\nwoman in pink\ngirl in pink\nmiddle guy\nsecond from right\nleft guy\nwhite jacket\nright guy\nblue jacket\nman in white shirt\nman\nman in back\nleft guy\nwoman on right\nwoman on right\nleft guy\nwoman on right\nman in black shirt\nman\nman\nright guy\nleft guy\nman in front\nman on left\nperson on right\nperson in front\nman in black\nman in front\nright skier\nperson in front\nsecond from left\nman on left\nsecond from right\nleft guy\nwoman on right\ngirl on right\nman on right\nman in front of man in white shirt\nwoman in white shirt\nman in black\ngroom\nbrid

PTBTokenizer tokenized 25044 tokens at 180725.75 tokens per second.


----lines: b'person bottom left\nleft black shirt\nman on right\nright man\nman sitting on bench\nperson on left of bench\nperson on left yellow boots\nperson left\nblue car right\nblue car\nwhite car left\nwhite car\nleft car\nman on far left on screen\nman left cut off\nman walking out of picture\nmain guy on the tv\nman front center\nseated man\nwoman in blue\nwomen in blue\nleft\nguy on right\nright guy\nguy\nlady middle pink\nwoman standing inbetween the two guys\nthe old lady in between the players\nguy on left of screen red shirt\nguy in back left red shirt\nguy in red on left\nleft guy organge\nguy on the left with the bat\norange\nbaseball batter on the right\nblue shirt\nright player\nbody of guy in back\nhalf of a person\nwere no good at this today lolguy top\nplayer number 8\nnumber 8\nblack shorts\n2\nstanding kid\nupper kid\nkid standing\nfront kid\nkid down\nboy sitting\nbaby holding toy\nright baby\nleft kid\nbaby on left\nwet hair\nlady gettin hair cut\nclient\nwoman b

KeyboardInterrupt: 

# 2. Evaluate Referring Expressions by Duplicate Rate

In [None]:
# evalue how many images contain duplicate expressions
pred_refToSent = {int(it['ref_id']): it['sent'] for it in sample_exprs}
pred_imgToSents = {}
for ref_id, pred_sent in pred_refToSent.items():
    image_id = refer.Refs[ref_id]['image_id']
    pred_imgToSents[image_id] = pred_imgToSents.get(image_id, []) + [pred_sent]
# count duplicate
duplicate = 0
for image_id, sents in pred_imgToSents.items():
    if len(set(sents)) < len(sents):
        duplicate += 1
ratio = duplicate*100.0 / len(pred_imgToSents)
print '%s/%s (%.2f%%) images have duplicate predicted sentences.' % (duplicate, len(pred_imgToSents), ratio)

# 3.Evaluate Referring Comprehension

In [None]:
# IoU function
def computeIoU(box1, box2):
    # each box is of [x1, y1, w, h]
    inter_x1 = max(box1[0], box2[0])
    inter_y1 = max(box1[1], box2[1])
    inter_x2 = min(box1[0]+box1[2]-1, box2[0]+box2[2]-1)
    inter_y2 = min(box1[1]+box1[3]-1, box2[1]+box2[3]-1)

    if inter_x1 < inter_x2 and inter_y1 < inter_y2:
        inter = (inter_x2-inter_x1+1)*(inter_y2-inter_y1+1)
    else:
        inter = 0
    union = box1[2]*box1[3] + box2[2]*box2[3] - inter
    return float(inter)/union

In [None]:
# randomly sample one ref
ref_ids = refer.getRefIds()
ref_id = ref_ids[np.random.randint(0, len(ref_ids))]
ref = refer.Refs[ref_id]

# let's fake one bounding box by randomly picking one instance inside this image
image_id = ref['image_id']
anns = refer.imgToAnns[image_id]
ann = anns[np.random.randint(0, len(anns))]

In [None]:
# draw box of the ref using 'green'
plt.figure()
refer.showRef(ref, seg_box='box')
# draw box of the ann using 'red'
ax = plt.gca()
bbox = ann['bbox']
box_plot = Rectangle((bbox[0], bbox[1]), bbox[2], bbox[3], fill=False, edgecolor='red', linewidth=2)
ax.add_patch(box_plot)
plt.show()

In [None]:
# Is the ann actually our ref?
# i.e., IoU >= 0.5?
ref_box = refer.refToAnn[ref_id]['bbox']
ann_box = ann['bbox']
IoU = computeIoU(ref_box, ann_box)
if IoU >= 0.5:
    print 'IoU=[%.2f], correct comprehension!' % IoU
else:
    print 'IoU=[%.2f], wrong comprehension!' % IoU