# 检查QPIC提供的数据集和原mat数据集是否相同

In [1]:
from pprint import pprint
import json

In [23]:
def check_ann(mat_anns, tgt_anns):
    missmatch_files = []
    for file_name, tgt_item in tgt_anns.items():
        mat_item = mat_anns[file_name]
        
        # 先收集mat的instance的box和label
        mat_ins = []
        for ann in mat_item['annotations']:
            mat_ins.append(tuple(ann['bbox'] + [ann['category_id']]))

        flag = False

        # 看tgt的instance是否都在tgt里
        ins_tgt2mat = {}
        for i, ann in enumerate(tgt_item['annotations']):
            tmp = tuple(ann['bbox'] + [ann['category_id']])
            if tmp not in mat_ins:
                flag = True
                break
            ins_tgt2mat[i] = mat_ins.index(tmp)

        if flag:
            missmatch_files.append(file_name)
            continue

        # 同理
        mat_rel = []
        for ann in mat_item['hoi_annotations']:
            mat_rel.append((ann['subject_id'], ann['object_id'], ann['category_id']))

        for ann in tgt_item['hoi_annotations']:
            tmp = (ins_tgt2mat[ann['subject_id']], ins_tgt2mat[ann['object_id']], 
                    ann['category_id'])
            if tmp not in mat_rel:
                flag = True
                break

        if flag:
            missmatch_files.append(file_name)
    
    print('num mat img:', len(mat_anns))
    print('num tgt img:', len(tgt_anns))
    print('num missmatch:', len(missmatch_files))
    return missmatch_files

## train
显然不一样，QPIC的标注被略微聚合过

In [20]:
mat_anns = json.load(open('../../annotations/raw/train.json'))
qpic_anns = json.load(open('../../annotations/qpic/train_qpic.json'))

In [21]:
mat_anns = {item['file_name']: item for item in mat_anns}
qpic_anns = {item['file_name']: item for item in qpic_anns}

In [7]:
pprint(mat_anns['HICO_train2015_00000001.jpg'], compact=True, width=100, sort_dicts=False)

{'file_name': 'HICO_train2015_00000001.jpg',
 'img_id': 1,
 'width': 640,
 'height': 480,
 'annotations': [{'bbox': [208, 33, 427, 300], 'category_id': 1},
                 {'bbox': [59, 98, 572, 405], 'category_id': 4},
                 {'bbox': [213, 20, 438, 357], 'category_id': 1},
                 {'bbox': [77, 115, 583, 396], 'category_id': 4},
                 {'bbox': [206, 33, 427, 306], 'category_id': 1},
                 {'bbox': [61, 100, 571, 401], 'category_id': 4},
                 {'bbox': [209, 26, 444, 317], 'category_id': 1},
                 {'bbox': [59, 99, 579, 395], 'category_id': 4}],
 'hoi_annotations': [{'subject_id': 0, 'object_id': 1, 'category_id': 73, 'hoi_category_id': 153},
                     {'subject_id': 2, 'object_id': 3, 'category_id': 77, 'hoi_category_id': 154},
                     {'subject_id': 4, 'object_id': 5, 'category_id': 88, 'hoi_category_id': 155},
                     {'subject_id': 6, 'object_id': 7, 'category_id': 99, 'hoi_categor

In [9]:
pprint(qpic_anns['HICO_train2015_00000001.jpg'], compact=True, width=100, sort_dicts=False)

{'file_name': 'HICO_train2015_00000001.jpg',
 'img_id': 0,
 'annotations': [{'bbox': [207, 32, 426, 299], 'category_id': 1},
                 {'bbox': [58, 97, 571, 404], 'category_id': 4}],
 'width': 640,
 'height': 480,
 'hoi_annotations': [{'subject_id': 0, 'object_id': 1, 'category_id': 73, 'hoi_category_id': 153},
                     {'subject_id': 0, 'object_id': 1, 'category_id': 77, 'hoi_category_id': 154},
                     {'subject_id': 0, 'object_id': 1, 'category_id': 88, 'hoi_category_id': 155},
                     {'subject_id': 0, 'object_id': 1, 'category_id': 99, 'hoi_category_id': 156}]}


In [24]:
train_missmatch = check_ann(mat_anns, qpic_anns)

num mat img: 37633
num tgt img: 37633
num missmatch: 37633


## test
一样

In [10]:
mat_anns = json.load(open('../../annotations/raw/test.json'))
qpic_anns = json.load(open('../../annotations/qpic/test_qpic.json'))

In [11]:
mat_anns = {item['file_name']: item for item in mat_anns}
qpic_anns = {item['file_name']: item for item in qpic_anns}

In [12]:
pprint(mat_anns['HICO_test2015_00000001.jpg'], compact=True, width=100, sort_dicts=False)

{'file_name': 'HICO_test2015_00000001.jpg',
 'img_id': 1,
 'width': 640,
 'height': 427,
 'annotations': [{'bbox': [320, 306, 359, 349], 'category_id': 1},
                 {'bbox': [270, 303, 311, 350], 'category_id': 1},
                 {'bbox': [148, 345, 376, 414], 'category_id': 15}],
 'hoi_annotations': [{'subject_id': 0, 'object_id': 2, 'category_id': 88, 'hoi_category_id': 246},
                     {'subject_id': 1, 'object_id': 2, 'category_id': 88, 'hoi_category_id': 246}]}


In [13]:
pprint(qpic_anns['HICO_test2015_00000001.jpg'], compact=True, width=100, sort_dicts=False)

{'file_name': 'HICO_test2015_00000001.jpg',
 'annotations': [{'bbox': [320, 306, 359, 349], 'category_id': 1},
                 {'bbox': [148, 345, 376, 414], 'category_id': 15},
                 {'bbox': [270, 303, 311, 350], 'category_id': 1}],
 'width': 640,
 'height': 427,
 'hoi_annotations': [{'subject_id': 0, 'object_id': 1, 'category_id': 88, 'hoi_category_id': 246},
                     {'subject_id': 2, 'object_id': 1, 'category_id': 88, 'hoi_category_id': 246}],
 'img_id': 0}


In [19]:
test_missmatch = check_ann(mat_anns, qpic_anns)

num missmatch: 0
