Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
ictLiuFenglin committed Sep 5, 2018
1 parent 88cad88 commit 4d3a1f3
Showing 1 changed file with 54 additions and 0 deletions.
54 changes: 54 additions & 0 deletions KarpathySplit.py
@@ -0,0 +1,54 @@
# coding: utf-8

# # Karpathy Split for MS-COCO Dataset
import json
from random import shuffle, seed

seed( 123 )

num_val = 5000
num_test = 5000

val = json.load( open('./data/annotations/captions_val2014.json', 'r') )
train = json.load( open('./data/annotations/captions_train2014.json', 'r') )

# Merge together
imgs = val['images'] + train['images']
annots = val['annotations'] + train['annotations']

shuffle( imgs )

# Split into val, test, train
dataset = {}
dataset[ 'val' ] = imgs[ :num_val ]
dataset[ 'test' ] = imgs[ num_val: num_val + num_test ]
dataset[ 'train' ] = imgs[ num_val + num_test: ]

# Group by image ids
itoa = {}
for a in annots:
imgid = a['image_id']
if not imgid in itoa: itoa[imgid] = []
itoa[imgid].append(a)


json_data = {}
info = train['info']
licenses = train['licenses']

split = [ 'test', 'val', 'train' ]

for subset in split:

json_data[ subset ] = { 'type':'caption', 'info':info, 'licenses': licenses,
'images':[], 'annotations':[] }

for img in dataset[ subset ]:

img_id = img['id']
anns = itoa[ img_id ]

json_data[ subset ]['images'].append( img )
json_data[ subset ]['annotations'].extend( anns )

json.dump( json_data[ subset ], open( './data/annotations/karpathy_split_' + subset + '.json', 'w' ) )

0 comments on commit 4d3a1f3

Please sign in to comment.