In [2]:
import json
from tqdm.autonotebook import tqdm
from evaluate_metrics import compute_f1, compute_avg_bleu

  


In [3]:
with open('./data/test/evjvqa_public_test-lang-qtype-answer.json', 'r', encoding='utf-8') as f:
    test_data = json.load(f)

In [4]:
test_annotations = test_data['annotations']

qid2lang = {}
qid2qtype = {}

for anno in tqdm(test_annotations):
    qid2lang[str(anno['id'])] = anno['language']
    qid2qtype[str(anno['id'])] = anno['question_type']
    
len(test_annotations), len(qid2lang), len(qid2qtype)

  0%|          | 0/5015 [00:00<?, ?it/s]

(5015, 5015, 5015)

In [6]:
with open('./outputs/public-test/groundtruth_results.json', 'r', encoding='utf-8') as f:
    gt_results = json.load(f)

## RUNNING

In [37]:
with open('./outputs/public-test/results-vit-mt5.json', 'r', encoding='utf-8') as f:
    mt5vit_results = json.load(f)

sep_qtype = True
languages = ['en', 'vi', 'ja']
# question_types = list(set(qid2qtype.values()))
question_types = ['HOW_MANY', 'WHAT_COLOR', 'WHERE', 'WHO', 'HOW', 'WHAT_IS', 'WHAT_DO', 'WHICH', 'OTHERS']

for lang in languages:
    pred = {}
    grth = {}
    print(f'------------------- {lang.upper()} -------------------')
    if sep_qtype:
        for qtype in question_types:
            pred = {}
            grth = {}
            for k, v in mt5vit_results.items():
                if qid2lang[k] == lang and qid2qtype[k] == qtype:
                    pred[k] = v
                    grth[k] = gt_results[k]
            
            f1 = compute_f1(a_gold=grth, a_pred=pred)
            bleu = compute_avg_bleu(a_gold=grth, a_pred=pred)
            
            print(f"Metrics of Language={lang} - Question Type={qtype}: F1 = {f1} and Bleu = {bleu}")
    
        print(f'------------------- END -------------------')
    else:
        for k, v in mt5vit_results.items():
            if qid2lang[k] == lang:
                pred[k] = v
                grth[k] = gt_results[k]

        f1 = compute_f1(a_gold=grth, a_pred=pred)
        bleu = compute_avg_bleu(a_gold=grth, a_pred=pred)
            
        print(f"Metrics of Language={lang}: F1 = {f1} and Bleu = {bleu}")

------------------- EN -------------------
Metrics of Language=en - Question Type=HOW_MANY: F1 = 0.15290013522865745 and Bleu = 0.15322208887005995
Metrics of Language=en - Question Type=WHAT_COLOR: F1 = 0.18853855787818052 and Bleu = 0.12518137087061593
Metrics of Language=en - Question Type=WHERE: F1 = 0.19176122196186499 and Bleu = 0.09459589580754617
Metrics of Language=en - Question Type=WHO: F1 = 0.22394857693619305 and Bleu = 0.10707395670936089
Metrics of Language=en - Question Type=HOW: F1 = 0.13667508001418033 and Bleu = 0.05589034318687309
Metrics of Language=en - Question Type=WHAT_IS: F1 = 0.13649668409858795 and Bleu = 0.06596107283250961
Metrics of Language=en - Question Type=WHAT_DO: F1 = 0.09775352735411079 and Bleu = 0.03948120374064543
Metrics of Language=en - Question Type=WHICH: F1 = 0.32174146319366903 and Bleu = 0.24227055915263437
Metrics of Language=en - Question Type=OTHERS: F1 = 0.1526480238108145 and Bleu = 0.09143225672564895
------------------- END -------

  return np.array(scores).mean()
  ret = ret.dtype.type(ret / rcount)


## Prepare training data

In [18]:
with open('./data/train/evjvqa_train_lang_qtype-detailed.json', 'r', encoding='utf-8') as f:
    train_data = json.load(f)
    
train_annotations = train_data['annotations']

In [19]:
subtrain_annotations = []

LIST_QTYPES = ['WHAT_IS', 'WHO', 'WHERE', 'OTHERS', 'WHAT_DO', 'WHICH', 'HOW']

for anno in tqdm(train_annotations):
    if anno['question_type'] in LIST_QTYPES:
        subtrain_annotations.append(anno)
        
print(len(subtrain_annotations))

subtrain_dict = {
    'images': train_data['images'],
    'annotations': subtrain_annotations
}

  0%|          | 0/23785 [00:00<?, ?it/s]

15109


In [20]:
with open('./data/train/evjvqa-subtrain-obj.json', 'w', encoding='utf-8') as f:
    json.dump(subtrain_dict, f, indent=4, ensure_ascii=False)

In [17]:
with open('./data/test/evjvqa_public_test-lang-qtype-answer.json', 'r', encoding='utf-8') as f:
    test_data = json.load(f)
    
test_annotations = test_data['annotations']

subtest_annotations = []

LIST_QTYPES = ['WHAT_IS', 'WHO', 'WHERE', 'OTHERS', 'WHAT_DO', 'WHICH', 'HOW']

for anno in tqdm(test_annotations):
    if anno['question_type'] in LIST_QTYPES:
        subtest_annotations.append(anno)
        
print(len(subtest_annotations))

subtest_dict = {
    'images': test_data['images'],
    'annotations': subtest_annotations
}

with open('./data/test/evjvqa-subtest-obj.json', 'w', encoding='utf-8') as f:
    json.dump(subtest_dict, f, indent=4, ensure_ascii=False)

  0%|          | 0/5015 [00:00<?, ?it/s]

3345


In [37]:
with open('./data/train/evjvqa_train_lang_qtype-desc-detailed.json', 'r', encoding='utf-8') as f:
    train_data = json.load(f)
    
train_images = train_data['images']
train_annotations = train_data['annotations']

In [40]:
id2desc = {}

for img_desc in tqdm(train_images):
    id2desc[img_desc['id']] = img_desc['desc'][0]

for anno in tqdm(train_annotations):
    anno['desc'] = id2desc[anno['image_id']]

  0%|          | 0/3763 [00:00<?, ?it/s]

  0%|          | 0/23785 [00:00<?, ?it/s]

In [42]:
with open('./data/train/evjvqa_train_lang_qtype-desc-detailed.json', 'w', encoding='utf-8') as f:
    json.dump(train_data, f, indent=4, ensure_ascii=False)

In [43]:
with open('./data/test/evjvqa_public_test-lang-qtype-desc-answer.json', 'r', encoding='utf-8') as f:
    test_data = json.load(f)
    
test_images = test_data['images']
test_annotations = test_data['annotations']

id2desc = {}

for img_desc in tqdm(test_images):
    id2desc[img_desc['id']] = img_desc['desc'][0]

for anno in tqdm(test_annotations):
    anno['desc'] = id2desc[anno['image_id']]

  0%|          | 0/558 [00:00<?, ?it/s]

  0%|          | 0/5015 [00:00<?, ?it/s]

In [45]:
with open('./data/test/evjvqa_public_test-lang-qtype-desc-answer.json', 'w', encoding='utf-8') as f:
    json.dump(test_data, f, indent=4, ensure_ascii=False)

In [46]:
with open('./data/private-test/evjvqa_private_test-desc.json', 'r', encoding='utf-8') as f:
    ptest_data = json.load(f)
    
ptest_images = ptest_data['images']
ptest_annotations = ptest_data['annotations']

id2desc = {}

for img_desc in tqdm(ptest_images):
    id2desc[img_desc['id']] = img_desc['desc'][0]

for anno in tqdm(ptest_annotations):
    anno['desc'] = id2desc[anno['image_id']]

  0%|          | 0/588 [00:00<?, ?it/s]

  0%|          | 0/10000 [00:00<?, ?it/s]

In [47]:
with open('./data/private-test/evjvqa_private_test-desc.json', 'w', encoding='utf-8') as f:
    json.dump(ptest_data, f, indent=4, ensure_ascii=False)