Adapted from chair.py for thesis experimentation. 

In [1]:
import json

# Load annotations
with open('../data/coco/annotations/instances_val2014.json', 'r') as f:
    coco_segments = json.load(f)

print("Dataset Loaded Successfully!")

Dataset Loaded Successfully!


In [2]:
# Load annotations
with open('../data/coco/annotations/captions_val2014.json', 'r') as f:
    coco_caps = json.load(f)

print("Dataset Loaded Successfully!")

Dataset Loaded Successfully!


In [3]:
# load synonyms.txt
with open('synonyms.txt', 'r') as f:
    synonyms_txt = f.readlines()

print("Synonyms Loaded Successfully!")

Synonyms Loaded Successfully!


In [4]:
imid_to_objects = {}

synonyms = [s.strip().split(', ') for s in synonyms_txt if s.strip()]
mscoco_objects = [] #mscoco objects and *all* synonyms
inverse_synonym_dict = {}
for synonym in synonyms:
    mscoco_objects.extend(synonym)
    for s in synonym:
        inverse_synonym_dict[s] = synonym[0]

#Some hard coded rules for implementing CHAIR metrics on MSCOCO
        
#common 'double words' in MSCOCO that should be treated as a single word
coco_double_words = ['motor bike', 'motor cycle', 'air plane', 'traffic light', 'street light', 'traffic signal', 'stop light', 'fire hydrant', 'stop sign', 'parking meter', 'suit case', 'sports ball', 'baseball bat', 'baseball glove', 'tennis racket', 'wine glass', 'hot dog', 'cell phone', 'mobile phone', 'teddy bear', 'hair drier', 'potted plant', 'bow tie', 'laptop computer', 'stove top oven', 'hot dog', 'teddy bear', 'home plate', 'train track']
        
#Hard code some rules for special cases in MSCOCO
#qualifiers like 'baby' or 'adult' animal will lead to a false fire for the MSCOCO object 'person'.  'baby bird' --> 'bird'.
animal_words = ['bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'animal', 'cub']
#qualifiers like 'passenger' vehicle will lead to a false fire for the MSCOCO object 'person'.  'passenger jet' --> 'jet'.
vehicle_words = ['jet', 'train']
        
#double_word_dict will map double words to the word they should be treated as in our analysis
        
double_word_dict = {}
for double_word in coco_double_words:
    double_word_dict[double_word] = double_word
for animal_word in animal_words:
    double_word_dict['baby %s' %animal_word] = animal_word
    double_word_dict['adult %s' %animal_word] = animal_word
for vehicle_word in vehicle_words:
    double_word_dict['passenger %s' %vehicle_word] = vehicle_word
double_word_dict['bow tie'] = 'tie'
double_word_dict['toilet seat'] = 'toilet'
double_word_dict['wine glas'] = 'wine glass'

In [8]:
segment_annotations = coco_segments['annotations']

#make dict linking object name to ids
id_to_name = {} #dict with id to synsets 
for cat in coco_segments['categories']:
    id_to_name[cat['id']] = cat['name']

print("Getting annotations from segmentation masks...")
for i, annotation in enumerate(segment_annotations):
    imid = annotation['image_id']
            
    node_word = inverse_synonym_dict[id_to_name[annotation['category_id']]]
    if imid not in imid_to_objects:
        imid_to_objects[imid] = []
    imid_to_objects[imid].append(node_word)

print("done!")

Getting annotations from segmentation masks...
done!


In [9]:
import nltk

nltk.download('wordnet', quiet=True)
from nltk.stem import WordNetLemmatizer

lemmatizer = WordNetLemmatizer()

def caption_to_words(caption):
    '''
    Input: caption
    Output: MSCOCO words in the caption
    '''

    #standard preprocessing
    words = nltk.word_tokenize(caption.lower())
    words = [lemmatizer.lemmatize(w, pos='n') for w in words]

    #replace double words
    i = 0
    double_words = []
    idxs = []
    while i < len(words):
        idxs.append(i) 
        double_word = ' '.join(words[i:i+2])
        if double_word in double_word_dict: 
            double_words.append(double_word_dict[double_word])
            i += 2
        else:
            double_words.append(words[i])
            i += 1
    words = double_words

    #toilet seat is not chair (sentences like "the seat of the toilet" will fire for "chair" if we do not include this line)
    if ('toilet' in words) & ('seat' in words): words = [word for word in words if word != 'seat']

    #get synonyms for all words in the caption
    idxs = [idxs[idx] for idx, word in enumerate(words) \
            if word in set(mscoco_objects)]
    words = [word for word in words if word in set(mscoco_objects)]
    node_words = []
    for word in words:
        node_words.append(inverse_synonym_dict[word])
    #return all the MSCOCO objects in the caption
    return words, node_words, idxs, double_words

In [10]:
caption_annotations = coco_caps['annotations']

print (caption_annotations[0])

{'image_id': 203564, 'id': 37, 'caption': 'A bicycle replica with a clock as the front wheel.'}


In [11]:
for i, annotation in enumerate(caption_annotations):
    imid = annotation['image_id']
            
    _, node_words, _, _ = caption_to_words(annotation['caption'])
    # note here is update, so call get_annotations_from_segments first
    if imid not in imid_to_objects:
        imid_to_objects[imid] = []
    imid_to_objects[imid].extend(node_words)

yippee!!
yippee!!
yippee!!
yippee!!
yippee!!


In [None]:
# deduplicate
for imid in imid_to_objects:
    imid_to_objects[imid] = list(set(imid_to_objects[imid]))

In [None]:
print(184613 in imid_to_objects)

In [None]:
# save imid_to_objects
with open('imid_to_objects.json', 'w') as f:
    json.dump(imid_to_objects, f)
print("Saved imid_to_objects.json successfully!")