# Preprocess VQA 2.0

In [1]:
import json

from collections import Counter

In [2]:
train_anno_path = "/ceph/kien/data2.0/v2_mscoco_train2014_annotations.json"
val_anno_path = "/ceph/kien/data2.0/v2_mscoco_val2014_annotations.json"

train_ques_path = "/ceph/kien/data2.0/v2_OpenEnded_mscoco_train2014_questions.json"
val_ques_path = "/ceph/kien/data2.0/v2_OpenEnded_mscoco_val2014_questions.json"
testdev_ques_path = "/ceph/kien/data2.0/v2_OpenEnded_mscoco_test-dev2015_questions.json"
test_ques_path = "/ceph/kien/data2.0/v2_OpenEnded_mscoco_test2015_questions.json"

In [3]:
def num_to_score(num):
    if num > 3:
        return 1
    elif num == 3:
        return 0.9
    elif num == 2:
        return 0.6
    elif num == 1:
        return 0.3
    elif num == 0:
        return 0
    else:
        raise TypeError("Wrong type of number!")

In [4]:
def save_dataset(subtype, ques_path, anno_path=None):
    dataset = []
    imdir = "/ceph/kien/data2.0/%s/COCO_%s_%012d.jpg"
    ques_data = json.load(open(ques_path, "r"))
    anno_data = json.load(open(anno_path, "r")) if anno_path is not None else None
    
    for i in range(len(ques_data["questions"])):
        ques = ques_data["questions"][i]["question"]
        ques_id = ques_data["questions"][i]["question_id"]
        img_id = ques_data["questions"][i]["image_id"]
        image_path = imdir%(subtype, subtype, img_id)
        
        item = {"ques_id": [ques_id], "img_path": image_path, "ques": [ques], "id": img_id}
        
        if anno_path is not None:
            mc_ans = anno_data["annotations"][i]["multiple_choice_answer"]
            answers = Counter()
            for ans in anno_data["annotations"][i]["answers"]:
                answers.update([ans["answer"]])
            answers = [(ans, num_to_score(num)) for ans, num in answers.items()]
            
            assert img_id == anno_data["annotations"][i]["image_id"], "Image index doesn't match!"
            assert ques_id == anno_data["annotations"][i]["question_id"], "Question index doesn't match!"
            
            item["mc_ans"] = [mc_ans]
            item["ans"] = [answers]
        dataset.append(item)
        if (i+1) % 1000 == 0:
            print("processing %i/%i" % (i, len(ques_data["questions"])))
        
    return dataset

In [5]:
trainset = save_dataset("train2014", train_ques_path, train_anno_path)

processing 999/443757
processing 1999/443757
processing 2999/443757
processing 3999/443757
processing 4999/443757
processing 5999/443757
processing 6999/443757
processing 7999/443757
processing 8999/443757
processing 9999/443757
processing 10999/443757
processing 11999/443757
processing 12999/443757
processing 13999/443757
processing 14999/443757
processing 15999/443757
processing 16999/443757
processing 17999/443757
processing 18999/443757
processing 19999/443757
processing 20999/443757
processing 21999/443757
processing 22999/443757
processing 23999/443757
processing 24999/443757
processing 25999/443757
processing 26999/443757
processing 27999/443757
processing 28999/443757
processing 29999/443757
processing 30999/443757
processing 31999/443757
processing 32999/443757
processing 33999/443757
processing 34999/443757
processing 35999/443757
processing 36999/443757
processing 37999/443757
processing 38999/443757
processing 39999/443757
processing 40999/443757
processing 41999/443757
pro

In [6]:
print(len(trainset))

num_samples = 5
for i in range(num_samples):
    print(trainset[i])

443757
{'ques_id': [458752000], 'img_path': '/ceph/kien/data2.0/train2014/COCO_train2014_000000458752.jpg', 'ques': ['What is this photo taken looking through?'], 'id': 458752, 'mc_ans': ['net'], 'ans': [[('net', 1), ('netting', 0.3), ('mesh', 0.3)]]}
{'ques_id': [458752001], 'img_path': '/ceph/kien/data2.0/train2014/COCO_train2014_000000458752.jpg', 'ques': ['What position is this man playing?'], 'id': 458752, 'mc_ans': ['pitcher'], 'ans': [[('pitcher', 1), ('catcher', 0.3)]]}
{'ques_id': [458752002], 'img_path': '/ceph/kien/data2.0/train2014/COCO_train2014_000000458752.jpg', 'ques': ['What color is the players shirt?'], 'id': 458752, 'mc_ans': ['orange'], 'ans': [[('orange', 1)]]}
{'ques_id': [458752003], 'img_path': '/ceph/kien/data2.0/train2014/COCO_train2014_000000458752.jpg', 'ques': ['Is this man a professional baseball player?'], 'id': 458752, 'mc_ans': ['yes'], 'ans': [[('yes', 1), ('no', 0.3)]]}
{'ques_id': [262146000], 'img_path': '/ceph/kien/data2.0/train2014/COCO_train2014

In [7]:
train_path = "/ceph/kien/VQA/dataset/mscoco_train.json"

with open(train_path, "w") as f:
    json.dump(trainset, f)

In [8]:
valset = save_dataset("val2014", val_ques_path, val_anno_path)

processing 999/214354
processing 1999/214354
processing 2999/214354
processing 3999/214354
processing 4999/214354
processing 5999/214354
processing 6999/214354
processing 7999/214354
processing 8999/214354
processing 9999/214354
processing 10999/214354
processing 11999/214354
processing 12999/214354
processing 13999/214354
processing 14999/214354
processing 15999/214354
processing 16999/214354
processing 17999/214354
processing 18999/214354
processing 19999/214354
processing 20999/214354
processing 21999/214354
processing 22999/214354
processing 23999/214354
processing 24999/214354
processing 25999/214354
processing 26999/214354
processing 27999/214354
processing 28999/214354
processing 29999/214354
processing 30999/214354
processing 31999/214354
processing 32999/214354
processing 33999/214354
processing 34999/214354
processing 35999/214354
processing 36999/214354
processing 37999/214354
processing 38999/214354
processing 39999/214354
processing 40999/214354
processing 41999/214354
pro

In [9]:
print(len(valset))

num_samples = 5
for i in range(num_samples):
    print(valset[i])

214354
{'ques_id': [262148000], 'img_path': '/ceph/kien/data2.0/val2014/COCO_val2014_000000262148.jpg', 'ques': ['Where is he looking?'], 'id': 262148, 'mc_ans': ['down'], 'ans': [[('down', 1), ('at table', 0.3), ('skateboard', 0.3), ('table', 0.3)]]}
{'ques_id': [262148001], 'img_path': '/ceph/kien/data2.0/val2014/COCO_val2014_000000262148.jpg', 'ques': ['What are the people in the background doing?'], 'id': 262148, 'mc_ans': ['watching'], 'ans': [[('spectating', 0.3), ('watching', 1)]]}
{'ques_id': [262148002], 'img_path': '/ceph/kien/data2.0/val2014/COCO_val2014_000000262148.jpg', 'ques': ['What is he on top of?'], 'id': 262148, 'mc_ans': ['picnic table'], 'ans': [[('table', 0.9), ('picnic table', 1), ('skateboard', 0.3)]]}
{'ques_id': [393225000], 'img_path': '/ceph/kien/data2.0/val2014/COCO_val2014_000000393225.jpg', 'ques': ['What website copyrighted the picture?'], 'id': 393225, 'mc_ans': ['foodiebakercom'], 'ans': [[('foodiebakercom', 1), ('foodiebaker', 0.6), ('http://foodieba

In [10]:
val_path = "/ceph/kien/VQA/dataset/mscoco_val.json"

with open(val_path, "w") as f:
    json.dump(valset, f)

In [11]:
trainval_path = "/ceph/kien/VQA/dataset/mscoco_trainval.json"

trainvalset = trainset + valset
with open(trainval_path, "w") as f:
    json.dump(trainvalset, f)

In [12]:
testdevset = save_dataset("test2015", testdev_ques_path)

processing 999/107394
processing 1999/107394
processing 2999/107394
processing 3999/107394
processing 4999/107394
processing 5999/107394
processing 6999/107394
processing 7999/107394
processing 8999/107394
processing 9999/107394
processing 10999/107394
processing 11999/107394
processing 12999/107394
processing 13999/107394
processing 14999/107394
processing 15999/107394
processing 16999/107394
processing 17999/107394
processing 18999/107394
processing 19999/107394
processing 20999/107394
processing 21999/107394
processing 22999/107394
processing 23999/107394
processing 24999/107394
processing 25999/107394
processing 26999/107394
processing 27999/107394
processing 28999/107394
processing 29999/107394
processing 30999/107394
processing 31999/107394
processing 32999/107394
processing 33999/107394
processing 34999/107394
processing 35999/107394
processing 36999/107394
processing 37999/107394
processing 38999/107394
processing 39999/107394
processing 40999/107394
processing 41999/107394
pro

In [13]:
print(len(testdevset))

num_samples = 5
for i in range(num_samples):
    print(testdevset[i])

107394
{'ques_id': [262144005], 'img_path': '/ceph/kien/data2.0/test2015/COCO_test2015_000000262144.jpg', 'ques': ['What credit card company is on the banner in the background?'], 'id': 262144}
{'ques_id': [262144003], 'img_path': '/ceph/kien/data2.0/test2015/COCO_test2015_000000262144.jpg', 'ques': ['Is the pitcher wearing a hat?'], 'id': 262144}
{'ques_id': [262144000], 'img_path': '/ceph/kien/data2.0/test2015/COCO_test2015_000000262144.jpg', 'ques': ['Is the ball flying towards the batter?'], 'id': 262144}
{'ques_id': [524289001], 'img_path': '/ceph/kien/data2.0/test2015/COCO_test2015_000000524289.jpg', 'ques': ['Are the horses playing a game?'], 'id': 524289}
{'ques_id': [524289002], 'img_path': '/ceph/kien/data2.0/test2015/COCO_test2015_000000524289.jpg', 'ques': ['What is the color of water in the image?'], 'id': 524289}


In [14]:
testdev_path = "/ceph/kien/VQA/dataset/mscoco_testdev.json"

with open(testdev_path, "w") as f:
    json.dump(testdevset, f)

In [15]:
testset = save_dataset("test2015", test_ques_path)

processing 999/447793
processing 1999/447793
processing 2999/447793
processing 3999/447793
processing 4999/447793
processing 5999/447793
processing 6999/447793
processing 7999/447793
processing 8999/447793
processing 9999/447793
processing 10999/447793
processing 11999/447793
processing 12999/447793
processing 13999/447793
processing 14999/447793
processing 15999/447793
processing 16999/447793
processing 17999/447793
processing 18999/447793
processing 19999/447793
processing 20999/447793
processing 21999/447793
processing 22999/447793
processing 23999/447793
processing 24999/447793
processing 25999/447793
processing 26999/447793
processing 27999/447793
processing 28999/447793
processing 29999/447793
processing 30999/447793
processing 31999/447793
processing 32999/447793
processing 33999/447793
processing 34999/447793
processing 35999/447793
processing 36999/447793
processing 37999/447793
processing 38999/447793
processing 39999/447793
processing 40999/447793
processing 41999/447793
pro

In [16]:
print(len(testset))

num_samples = 5
for i in range(num_samples):
    print(testset[i])

447793
{'ques_id': [262144000], 'img_path': '/ceph/kien/data2.0/test2015/COCO_test2015_000000262144.jpg', 'ques': ['Is the ball flying towards the batter?'], 'id': 262144}
{'ques_id': [262144001], 'img_path': '/ceph/kien/data2.0/test2015/COCO_test2015_000000262144.jpg', 'ques': ['What sport is this?'], 'id': 262144}
{'ques_id': [262144002], 'img_path': '/ceph/kien/data2.0/test2015/COCO_test2015_000000262144.jpg', 'ques': ['Can you see the ball?'], 'id': 262144}
{'ques_id': [262144003], 'img_path': '/ceph/kien/data2.0/test2015/COCO_test2015_000000262144.jpg', 'ques': ['Is the pitcher wearing a hat?'], 'id': 262144}
{'ques_id': [262144004], 'img_path': '/ceph/kien/data2.0/test2015/COCO_test2015_000000262144.jpg', 'ques': ['Will he catch the ball in time?'], 'id': 262144}


In [17]:
test_path = "/ceph/kien/VQA/dataset/mscoco_test.json"

with open(test_path, "w") as f:
    json.dump(testset, f)

# Preprocess VQA 1.0

In [18]:
import json

from collections import Counter

In [19]:
train_anno_path = "/ceph/kien/data1.0/mscoco_train2014_annotations.json"
val_anno_path = "/ceph/kien/data1.0/mscoco_val2014_annotations.json"

train_ques_path = "/ceph/kien/data1.0/OpenEnded_mscoco_train2014_questions.json"
val_ques_path = "/ceph/kien/data1.0/OpenEnded_mscoco_val2014_questions.json"
testdev_ques_path = "/ceph/kien/data1.0/OpenEnded_mscoco_test-dev2015_questions.json"
test_ques_path = "/ceph/kien/data1.0/OpenEnded_mscoco_test2015_questions.json"

trainmc_ques_path = "/ceph/kien/data1.0/MultipleChoice_mscoco_train2014_questions.json"
valmc_ques_path = "/ceph/kien/data1.0/MultipleChoice_mscoco_val2014_questions.json"
testdevmc_ques_path = "/ceph/kien/data1.0/MultipleChoice_mscoco_test-dev2015_questions.json"
testmc_ques_path = "/ceph/kien/data1.0/MultipleChoice_mscoco_test2015_questions.json"

In [20]:
def num_to_score(num):
    if num > 3:
        return 1
    elif num == 3:
        return 0.9
    elif num == 2:
        return 0.6
    elif num == 1:
        return 0.3
    elif num == 0:
        return 0
    else:
        raise TypeError("Wrong type of number!")

In [21]:
def save_dataset(subtype, ques_path, mc_path, anno_path=None):
    dataset = []
    imdir = "/ceph/kien/data2.0/%s/COCO_%s_%012d.jpg"
    ques_data = json.load(open(ques_path, "r"))
    mc_data = json.load(open(mc_path, "r"))
    anno_data = json.load(open(anno_path, "r")) if anno_path is not None else None
    
    for i in range(len(ques_data["questions"])):
        ques = ques_data["questions"][i]["question"]
        ques_id = ques_data["questions"][i]["question_id"]
        mc = mc_data["questions"][i]["multiple_choices"]
        img_id = ques_data["questions"][i]["image_id"]
        image_path = imdir%(subtype, subtype, img_id)
        
        item = {"ques_id": [ques_id], "img_path": image_path, "ques": [ques], "id": img_id, "mc": [mc]}
        
        if anno_path is not None:
            mc_ans = anno_data["annotations"][i]["multiple_choice_answer"]
            answers = Counter()
            for ans in anno_data["annotations"][i]["answers"]:
                answers.update([ans["answer"]])
            answers = [(ans, num_to_score(num)) for ans, num in answers.items()]
            
            assert img_id == anno_data["annotations"][i]["image_id"], "Image index doesn't match!"
            assert ques_id == anno_data["annotations"][i]["question_id"], "Question index doesn't match!"
            
            item["mc_ans"] = [mc_ans]
            item["ans"] = [answers]
        dataset.append(item)
        if (i+1) % 1000 == 0:
            print("processing %i/%i" % (i, len(ques_data["questions"])))
        
    return dataset

In [22]:
trainset = save_dataset("train2014", train_ques_path, trainmc_ques_path, train_anno_path)

processing 999/248349
processing 1999/248349
processing 2999/248349
processing 3999/248349
processing 4999/248349
processing 5999/248349
processing 6999/248349
processing 7999/248349
processing 8999/248349
processing 9999/248349
processing 10999/248349
processing 11999/248349
processing 12999/248349
processing 13999/248349
processing 14999/248349
processing 15999/248349
processing 16999/248349
processing 17999/248349
processing 18999/248349
processing 19999/248349
processing 20999/248349
processing 21999/248349
processing 22999/248349
processing 23999/248349
processing 24999/248349
processing 25999/248349
processing 26999/248349
processing 27999/248349
processing 28999/248349
processing 29999/248349
processing 30999/248349
processing 31999/248349
processing 32999/248349
processing 33999/248349
processing 34999/248349
processing 35999/248349
processing 36999/248349
processing 37999/248349
processing 38999/248349
processing 39999/248349
processing 40999/248349
processing 41999/248349
pro

In [23]:
print(len(trainset))

num_samples = 5
for i in range(num_samples):
    print(trainset[i])

248349
{'ques_id': [4870250], 'img_path': '/ceph/kien/data2.0/train2014/COCO_train2014_000000487025.jpg', 'ques': ['What shape is the bench seat?'], 'id': 487025, 'mc': [['square', '1', 'w 26th st', 'white', 'rectangular', '2', 'red', '3', 'blue', '4', 'yellow sign', 'curved', 'green', 'no', 'rectangle', 'croatia express', 'medal', 'yes']], 'mc_ans': ['curved'], 'ans': [[('oval', 0.3), ('semi circle', 0.3), ('curved', 1), ('double curve', 0.3), ('banana', 0.3), ('wavy', 0.3), ('twisting', 0.3)]]}
{'ques_id': [4870251], 'img_path': '/ceph/kien/data2.0/train2014/COCO_train2014_000000487025.jpg', 'ques': ['Is there a shadow?'], 'id': 487025, 'mc': [['north carolina', 'against wall', '1', '3', 'cookbook', 'audience', 'ppk', '2', 'elgin county', 'blue', 'desk', 'green', 'red', '4', 'adjusting woman oxygen', 'yes', 'no', 'white']], 'mc_ans': ['yes'], 'ans': [[('yes', 1)]]}
{'ques_id': [4870252], 'img_path': '/ceph/kien/data2.0/train2014/COCO_train2014_000000487025.jpg', 'ques': ['Is this one

In [24]:
train_path = "/ceph/kien/VQA/dataset/v1_mscoco_train.json"

with open(train_path, "w") as f:
    json.dump(trainset, f)

In [25]:
valset = save_dataset("val2014", val_ques_path, valmc_ques_path, val_anno_path)

processing 999/121512
processing 1999/121512
processing 2999/121512
processing 3999/121512
processing 4999/121512
processing 5999/121512
processing 6999/121512
processing 7999/121512
processing 8999/121512
processing 9999/121512
processing 10999/121512
processing 11999/121512
processing 12999/121512
processing 13999/121512
processing 14999/121512
processing 15999/121512
processing 16999/121512
processing 17999/121512
processing 18999/121512
processing 19999/121512
processing 20999/121512
processing 21999/121512
processing 22999/121512
processing 23999/121512
processing 24999/121512
processing 25999/121512
processing 26999/121512
processing 27999/121512
processing 28999/121512
processing 29999/121512
processing 30999/121512
processing 31999/121512
processing 32999/121512
processing 33999/121512
processing 34999/121512
processing 35999/121512
processing 36999/121512
processing 37999/121512
processing 38999/121512
processing 39999/121512
processing 40999/121512
processing 41999/121512
pro

In [26]:
print(len(valset))

num_samples = 5
for i in range(num_samples):
    print(valset[i])

121512
{'ques_id': [3506232], 'img_path': '/ceph/kien/data2.0/val2014/COCO_val2014_000000350623.jpg', 'ques': ['What is the table made of?'], 'id': 350623, 'mc': [['4', 'green', 'no', 'metal', '2', 'blue', 'plastic', 'marble', 'wood', 'white', 'red', 'concrete bricks', 'robe', '3', '1', 'yes', 'siam', 'white and black']], 'mc_ans': ['wood'], 'ans': [[('wood', 1)]]}
{'ques_id': [3506230], 'img_path': '/ceph/kien/data2.0/val2014/COCO_val2014_000000350623.jpg', 'ques': ['Is the food napping on the table?'], 'id': 350623, 'mc': [['tv is on', 'red', 'blue', '3', 'green', 'no', 'white', 'yes', '1', 'disney', 'on porch', 'mother and child', '2', 'slug bug', 'cartoons', '4', 'playing with dog', 'burger']], 'mc_ans': ['no'], 'ans': [[('no', 1), ('yes', 0.6)]]}
{'ques_id': [3506231], 'img_path': '/ceph/kien/data2.0/val2014/COCO_val2014_000000350623.jpg', 'ques': ['What has been upcycled to make lights?'], 'id': 350623, 'mc': [['3', 'build bear', 'no', 'blue', 'bulbs', 'green', '1', 'white', 'tin

In [27]:
val_path = "/ceph/kien/VQA/dataset/v1_mscoco_val.json"

with open(val_path, "w") as f:
    json.dump(valset, f)

In [28]:
trainval_path = "/ceph/kien/VQA/dataset/v1_mscoco_trainval.json"

trainvalset = trainset + valset
with open(trainval_path, "w") as f:
    json.dump(trainvalset, f)

In [29]:
testdevset = save_dataset("test2015", testdev_ques_path, testdevmc_ques_path)

processing 999/60864
processing 1999/60864
processing 2999/60864
processing 3999/60864
processing 4999/60864
processing 5999/60864
processing 6999/60864
processing 7999/60864
processing 8999/60864
processing 9999/60864
processing 10999/60864
processing 11999/60864
processing 12999/60864
processing 13999/60864
processing 14999/60864
processing 15999/60864
processing 16999/60864
processing 17999/60864
processing 18999/60864
processing 19999/60864
processing 20999/60864
processing 21999/60864
processing 22999/60864
processing 23999/60864
processing 24999/60864
processing 25999/60864
processing 26999/60864
processing 27999/60864
processing 28999/60864
processing 29999/60864
processing 30999/60864
processing 31999/60864
processing 32999/60864
processing 33999/60864
processing 34999/60864
processing 35999/60864
processing 36999/60864
processing 37999/60864
processing 38999/60864
processing 39999/60864
processing 40999/60864
processing 41999/60864
processing 42999/60864
processing 43999/60864

In [30]:
print(len(testdevset))

num_samples = 5
for i in range(num_samples):
    print(testdevset[i])

60864
{'ques_id': [4195880], 'img_path': '/ceph/kien/data2.0/test2015/COCO_test2015_000000419588.jpg', 'ques': ['Are the dogs tied?'], 'id': 419588, 'mc': [['1', 'bare', 'bacon hot dog beans', 'stumbling', '4', 'no', '3', 'black', 'yes', '2', 'ringling bros and barnum & bailey', 'quilted northern', 'junk', 'white', 'blue', 'hopefully', 'red', 'grass']]}
{'ques_id': [4195881], 'img_path': '/ceph/kien/data2.0/test2015/COCO_test2015_000000419588.jpg', 'ques': ['Is this a car show?'], 'id': 419588, 'mc': [['chickens', 'wedding', 'not', 'bowling alley', 'casa nos bairros', 'raincoat', '3', 'no', 'red', 'yes', 'blue', 'pc', 'black', '1', '2', 'on buffalo', 'white', '4']]}
{'ques_id': [4195882], 'img_path': '/ceph/kien/data2.0/test2015/COCO_test2015_000000419588.jpg', 'ques': ['Is there a lady sitting inside the red truck?'], 'id': 419588, 'mc': [['black', '3', 'southern', 'blueberries', 'blonde and blue', 'blue', 'in field', 'train station', 'no', '2', '4', 'n', 'yes', '1', 'for sauce', 'red

In [31]:
testdev_path = "/ceph/kien/VQA/dataset/v1_mscoco_testdev.json"

with open(testdev_path, "w") as f:
    json.dump(testdevset, f)

In [32]:
testset = save_dataset("test2015", test_ques_path, testmc_ques_path)

processing 999/244302
processing 1999/244302
processing 2999/244302
processing 3999/244302
processing 4999/244302
processing 5999/244302
processing 6999/244302
processing 7999/244302
processing 8999/244302
processing 9999/244302
processing 10999/244302
processing 11999/244302
processing 12999/244302
processing 13999/244302
processing 14999/244302
processing 15999/244302
processing 16999/244302
processing 17999/244302
processing 18999/244302
processing 19999/244302
processing 20999/244302
processing 21999/244302
processing 22999/244302
processing 23999/244302
processing 24999/244302
processing 25999/244302
processing 26999/244302
processing 27999/244302
processing 28999/244302
processing 29999/244302
processing 30999/244302
processing 31999/244302
processing 32999/244302
processing 33999/244302
processing 34999/244302
processing 35999/244302
processing 36999/244302
processing 37999/244302
processing 38999/244302
processing 39999/244302
processing 40999/244302
processing 41999/244302
pro

In [33]:
print(len(testset))

num_samples = 5
for i in range(num_samples):
    print(testset[i])

244302
{'ques_id': [4195880], 'img_path': '/ceph/kien/data2.0/test2015/COCO_test2015_000000419588.jpg', 'ques': ['Are the dogs tied?'], 'id': 419588, 'mc': [['1', 'bare', 'bacon hot dog beans', 'stumbling', '4', 'no', '3', 'black', 'yes', '2', 'ringling bros and barnum & bailey', 'quilted northern', 'junk', 'white', 'blue', 'hopefully', 'red', 'grass']]}
{'ques_id': [4195881], 'img_path': '/ceph/kien/data2.0/test2015/COCO_test2015_000000419588.jpg', 'ques': ['Is this a car show?'], 'id': 419588, 'mc': [['chickens', 'wedding', 'not', 'bowling alley', 'casa nos bairros', 'raincoat', '3', 'no', 'red', 'yes', 'blue', 'pc', 'black', '1', '2', 'on buffalo', 'white', '4']]}
{'ques_id': [4195882], 'img_path': '/ceph/kien/data2.0/test2015/COCO_test2015_000000419588.jpg', 'ques': ['Is there a lady sitting inside the red truck?'], 'id': 419588, 'mc': [['black', '3', 'southern', 'blueberries', 'blonde and blue', 'blue', 'in field', 'train station', 'no', '2', '4', 'n', 'yes', '1', 'for sauce', 're

In [34]:
test_path = "/ceph/kien/VQA/dataset/v1_mscoco_test.json"

with open(test_path, "w") as f:
    json.dump(testset, f)

# Load pre-trained GloVe vectors

In [8]:
import torch
import numpy as np

from collections import Counter
from torchtext import vocab
from torchtext.vocab import GloVe

In [9]:
gloves = GloVe(name="840B", dim="300")

In [10]:
c = Counter()
for word in gloves.itos:
    c.update([word])

print(len(c))
v = vocab.Vocab(c, min_freq=1, specials=["<_>", "<unk>"], vectors=gloves)

2196016


In [11]:
idx2word = v.itos
word2idx = v.stoi
vectors = v.vectors

In [23]:
print(len(idx2word))
print(len(word2idx.keys()))
print(len(vectors))

print(type(idx2word))
print(type(word2idx))
print(vectors.size())

2196018
2196018
2196018
<class 'list'>
<class 'collections.defaultdict'>
torch.Size([2196018, 300])


In [13]:
vocab_path = "/ceph/kien/VQA/dataset/glove_840B.pt"

info = {
    "idx2word": idx2word,
    "word2idx": word2idx,
    "vectors": vectors,
}
torch.save(info, vocab_path)

In [21]:
import pickle
with open("/ceph/kien/VisualQA/dataset/cocotrain_6_info.pkl", "rb") as file:
    word_dict = pickle.load(file)["word2idx"]

vecs = np.load("/ceph/kien/VisualQA/dataset/glove_840B.npz")["word2vec"]

In [29]:
sentence = ["is", "this", "old", "livingroom", "in", "the", "wrong", "side", "?", "<_>", "<unk>"]

for word in sentence:
    print("True") if np.allclose(vecs[word_dict[word]], vectors[word2idx[word]].numpy()) else print("False")

True
True
True
True
True
True
True
True
True
True
False
