forked from ruotianluo/ImageCaptioning.pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
msvd2coco.py
99 lines (81 loc) · 3.11 KB
/
msvd2coco.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import cPickle
import json
import random
def read():
with open('data/msvd_raw/vid2name.pkl') as file:
vid2name = cPickle.load(file)
# vid2name: int -> str
n_video = len(vid2name.keys())
print('> total video count (n_video): {}'.format(n_video))
vids = vid2name.keys()
random.shuffle(vids)
# traditional msvd split: 1200 | 100 | 670
train_split = vids[0: 1200]
valid_split = vids[1200: 1300]
test_split = vids[1300: ]
train_dict = {vid : 'train' for vid in train_split}
valid_dict = {vid : 'val' for vid in valid_split}
test_dict = {vid : 'test' for vid in test_split}
split_dict = train_dict
split_dict.update(valid_dict)
split_dict.update(test_dict)
# should construct cap as a dictionary:
# cap['vid0'] -> list of {'cap_id', 'tokenized'(train), 'caption'(eval), 'image_id'}
with open('data/msvd_raw/cap.pkl') as file:
caps = cPickle.load(file)
# will be dumped as dataset.json
images = []
for vid in range(n_video):
video_dict = {}
video_dict['imgid'] = vid
video_dict['cocoid'] = vid
video_dict['sentences'] = []
video_dict['sentids'] = []
video_dict['split'] = split_dict[vid]
images.append(video_dict)
# will be dumped as coco_reference.json
# only contains TEST set captions.
annotations = []
igs = []
sentence_id = 0
for vid in range(n_video):
for cap in caps['vid%s'%vid]:
sent_dict = {}
sent_dict['imgid'] = vid
sent_dict['raw'] = cap['caption']
sent_dict['tokens'] = cap['tokenized'].split(' ')
sent_dict['sentid'] = sentence_id
if images[vid]['split'] == 'test':
anno_dict = {}
anno_dict['caption'] = cap['caption']
anno_dict['id'] = sentence_id
anno_dict['image_id'] = vid
annotations.append(anno_dict)
img_dict = {}
img_dict['id'] = vid
igs.append(img_dict)
images[vid]['sentences'].append(sent_dict)
images[vid]['sentids'].append(sentence_id)
sentence_id += 1
# dump dataset_msvd.json and coco_reference_msvd.json
dataset_msvd = {}
dataset_msvd['images'] = images
with open('data/msvd_dataset/dataset_msvd.json', 'w') as file:
json.dump(dataset_msvd, file)
print('> wrote dataset_msvd.json')
print('>', dataset_msvd['images'][0])
coco_ref_msvd = {}
coco_ref_msvd['annotations'] = annotations
coco_ref_msvd['images'] = igs
coco_ref_msvd['type'] = 'captions'
coco_ref_msvd['info'] = 'msvd'
coco_ref_msvd['licenses'] = 'Ke Su'
with open('data/msvd_dataset/coco_ref_msvd.json', 'w') as file:
json.dump(coco_ref_msvd, file)
print('> wrote coco_ref_msvd.json')
print('>', coco_ref_msvd['annotations'][:4])
if __name__ == '__main__':
read()