In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import tensorflow as tf
config = tf.ConfigProto()
config.gpu_options.allow_growth=True
import numpy as np
sess = tf.Session(config=config)

In [3]:
import qa_consistency
import qa_consistency.dataset_utils
import qa_consistency.implication
import json
import pickle
import os



# Example: generating implications

In [4]:
gen = qa_consistency.implication.ImplicationsVQA()

Did not use initialization regex that was passed: .*bias_ih.*
Did not use initialization regex that was passed: .*weight_hh.*
Did not use initialization regex that was passed: .*bias_hh.*
Did not use initialization regex that was passed: .*weight_ih.*


In [5]:
gen.implications('How many birds?', '3')

Your label namespace was 'pos'. We recommend you use a namespace ending with 'labels' or 'tags', so we don't add UNK and PAD tokens by default to your vocabulary.  See documentation for `non_padded_namespaces` parameter in Vocabulary.


[('Are there 3 birds ?', 'yes', 'yeseqcount'),
 ('Are there 4 birds ?', 'no', 'n+1'),
 ('Are there any birds ?', 'yes', 'ans>0 implies some')]

This path has to have all of the VQA json files

In [6]:
vqa_path = '/home/marcotcr/datasets/vqa'


In [7]:
vqa_v1 = qa_consistency.dataset_utils.load_vqa(vqa_path, 'validation')

# Generating implications for all VQA v1 and v2 (question, answer) pairs. You can skip this and load my precomputed implications below.

In [8]:
vqa_v2 = qa_consistency.dataset_utils.load_vqav2(vqa_path, 'validation')

In [9]:
all_qs, all_as = qa_consistency.dataset_utils.question_answers_product(vqa_v1.questions + vqa_v2.questions, vqa_v1.all_answers + vqa_v2.all_answers)

In [10]:
parsed_qas = gen.parse_dataset(all_qs, all_as, verbose=True)

  0%|          | 0/81565 [00:00<?, ?it/s]

Const parse questions


100%|██████████| 81565/81565 [53:54<00:00, 25.22it/s]
  0%|          | 0/815.65 [00:00<?, ?it/s]

Dep parse questions


Encountered the arc_loss key in the model's return dictionary which couldn't be split by the batch size. Key will be ignored.
Encountered the tag_loss key in the model's return dictionary which couldn't be split by the batch size. Key will be ignored.
Encountered the loss key in the model's return dictionary which couldn't be split by the batch size. Key will be ignored.
816it [23:38,  1.14it/s]                            


In [11]:
implications = [gen.implications_from_parsed(x) for x in parsed_qas]

In [48]:
# vqa_v1.idxs

In [None]:
output_folder = '/home/marcotcr/tmp/'

In [24]:
all_imps = {}
for qa, imp in zip(parsed_qas, implications):
    all_imps[qa.as_tuple()] = imp
pickle.dump(all_imps, open(os.path.join(output_folder, 'vqa_imps.pkl'), 'wb'))

# Start from here if you want to use precomputed implications (link to pkl file in the repository's README)

In [27]:
output_folder = '/home/marcotcr/tmp/'
all_imps = pickle.load(open(os.path.join(output_folder, 'vqa_imps.pkl'), 'rb'))
consistency_folder = os.path.join(output_folder, 'vqa_v1_consistency')

Load original predictions from your model in the official vqa format

In [40]:
preds_path = os.path.join(output_folder, 'orig_preds.json')

In [48]:
# make sure this folder exists
qa_consistency.dataset_utils.generate_implication_vqa(vqa_v1, preds_path, all_imps, consistency_folder)

Writing:
/home/marcotcr/tmp/vqa_v1_consistency/questions.json
/home/marcotcr/tmp/vqa_v1_consistency/annotations.json


Now you would have to run your model on the generated files. Let's create a fake output in the right format just for simulation:

In [61]:
question_ids = [x['question_id'] for x in json.load(open(os.path.join(consistency_folder, 'questions.json'), 'r'))['questions']]

In [67]:
fake_preds_path = os.path.join(output_folder, 'consistency_preds.json')

In [64]:
json.dump([{'question_id': q, 'answer': a} for q, a in zip(question_ids, np.random.choice(['yes', 'no'], len(question_ids)))],
          open(fake_preds_path, 'w'))

In [70]:
stats = qa_consistency.dataset_utils.evaluate_consistency_vqa(consistency_folder, fake_preds_path)
print('Consistency by implication type:')
print()
for x, v in stats.items():
    if x == 'all':
        continue
    print('%s : %.1f' % (x, 100* v))
print()
print('Avg  : %.1f' % (100 * stats['all']))

Consistency by implication type:

logeq : 50.3
necessary_condition : 49.7
mutex : 49.9

Avg  : 50.0
