In [52]:
import os
import pickle
from nltk.translate import bleu_score
import numpy as np

# Load utils

In [5]:
from utils import load_vocab, decode_caption, load_caption, rrv_votes, load_annotations, print_image

vocab = load_vocab(dict_file = "../../outputs/vocab/5000/coco2014_vocab.json")
image_id_to_index, index_to_image_id, annotations_dict = load_annotations(annotations_dir="../../annotations/", 
                                                                          annotations_file='captions_val2014.json',
                                                                          map_file = "../../outputs/val_image_id_to_idx.csv")
print("Processed {} images".format(len(image_id_to_index)))
print("Processed {} images".format(len(annotations_dict.keys())))

annotations_list = [annotations_dict[image_id] for image_id in sorted(annotations_dict)]

idx_to_word
word_to_idx
Loaded dictionary...
Dictionary size: 5004
Error proccessing image_id: image_index
Skipping file person_keypoints_train2014.json
Skipping file instances_train2014.json
Skipping file instances_val2014.json
Skipping file person_keypoints_val2014.json
Processed 40504 images
Processed 40504 images


In [6]:
def load_best_beam_captions(beam_size):
    file_name = '../../outputs/voted_captions/{}/best_beam.pickle'.format(beam_size)
    with open(file_name, 'rb') as file:
        best_beams = pickle.load(file)
    return [decode_caption(best_beams[im_id], vocab) for im_id in sorted(best_beams)]

# Load all beams and extract best for each caption

In [16]:
def load_diverse_beam(diverse_beam_path):
    with open(diverse_beam_path, 'rb') as file:
        beams = pickle.load(file)
    return beams

def load_best_diverse_beams(beam_size):
    best_beams = {}
    beam_captions_dir = "../../outputs/diverse_beam/diverse_{}/".format(beam_size)
    for beam_path in os.listdir(beam_captions_dir):
        image_id = int(beam_path.split('.')[0])
        beam = load_diverse_beam(os.path.join(beam_captions_dir, beam_path))
        assert image_id == beam['image_id']
        best_captions = decode_caption(beam['captions'][0], vocab)
        best_beams[image_id] = best_captions
    return best_beams

In [36]:
diverse_captions = {}
beam_captions = {}
for k in [2, 10, 100]:
    diverse_captions[k] = load_best_diverse_beams(k)
    best_beam = load_best_beam_captions(k)
    beam_captions[k] = {i: best_beam[i] for i in diverse_best_beams.keys()}
    print("Loaded {} captions for beam size {}".format(len(diverse_captions[k]), k))

Loaded 40504 captions for beam size 2
Loaded 40504 captions for beam size 10
Loaded 40503 captions for beam size 100


## Compute Bleu score

In [34]:
def bleu1(best_beams):
    filtered_annotations = []
    captions = []
    for k in best_beams.keys():
        filtered_annotations.append(annotations_list[k])
        captions.append(best_beams[k])
    return bleu_score.corpus_bleu(filtered_annotations, captions, weights=[1.])

def bleu4(best_beams):
    filtered_annotations = []
    captions = []
    for k in best_beams.keys():
        filtered_annotations.append(annotations_list[k])
        captions.append(best_beams[k])
    return bleu_score.corpus_bleu(filtered_annotations, captions)

In [39]:
for k in [2, 10, 100]:
    diverse_bleu = bleu1(diverse_captions[k])
    beam_bleu = bleu1(beam_captions[k])
    print("k=", k)
    print("Diverse: ", diverse_bleu)
    print("Beam: ", beam_bleu)

k= 2
Diverse:  0.6790185372199954
Beam:  0.6797152717866048

k= 10
Diverse:  0.6724360325620435
Beam:  0.6723212808618904

k= 100
Diverse:  0.6642813357482044
Beam:  0.6617741577931905



In [37]:
bleu4(beam_captions[k])

0.2630932525817317

In [38]:
for k in [2, 10, 100]:
    diverse_bleu = bleu4(diverse_captions[k])
    beam_bleu = bleu4(beam_captions[k])
    print("k=", k)
    print("Diverse: ", diverse_bleu)
    print("Beam: ", beam_bleu)

k= 2
Diverse:  0.26680450416698176
Beam:  0.2682792451263592

k= 10
Diverse:  0.2692899477035207
Beam:  0.2716384032063896

k= 100
Diverse:  0.2637418486547894
Beam:  0.2630932525817317



# Unique n-grams

In [40]:
def num_unique_unigrams(captions):
    caption_strings = [" ".join(c) for c in captions]
    unique_unigrams = set()
    for caption in caption_strings:
        unigrams = caption.split()
        unique_unigrams |= set(unigrams)
    return len(unique_unigrams)

In [41]:
def num_unique_bigrams(captions):
    caption_strings = [" ".join(c) for c in captions]
    unique_bigrams = set()
    N = 0
    for caption in caption_strings:
        unigrams = caption.split()
        bigrams = list(zip(caption.split()[:-1], caption.split()[1:]))
        N += len(unigrams)
        unique_bigrams |= set(bigrams)
    return len(unique_bigrams)

In [45]:
list(diverse_captions[2].values())

[['a',
  'man',
  'riding',
  'a',
  'skateboard',
  'up',
  'the',
  'side',
  'of',
  'a',
  'ramp'],
 ['a', 'bowl', 'of', 'soup', 'sitting', 'on', 'a', 'table'],
 ['a', 'bus', 'is', 'parked', 'on', 'the', 'side', 'of', 'the', 'road'],
 ['a',
  'motorcycle',
  'parked',
  'in',
  'a',
  'field',
  'with',
  'a',
  'in',
  'the',
  'background'],
 ['a', 'bedroom', 'with', 'a', 'desk', 'and', 'a', 'bed'],
 ['a', 'man', 'is', 'holding', 'a', 'frisbee', 'in', 'his', 'hand'],
 ['a', 'young', 'boy', 'eating', 'a', 'banana', 'in', 'a'],
 ['a', 'man', 'is', 'standing', 'in', 'a', 'field', 'with', 'a', 'frisbee'],
 ['a',
  'man',
  'walking',
  'down',
  'a',
  'sidewalk',
  'next',
  'to',
  'a',
  'building'],
 ['a', 'bus', 'is', 'parked', 'in', 'a', 'tracks', 'lot'],
 ['a', 'dog', 'is', 'in', 'a'],
 ['a', 'of', 'luggage', 'sitting', 'in', 'a', 'room'],
 ['a', 'baseball', 'player', 'swinging', 'a', 'bat', 'at', 'a', 'ball'],
 ['a', 'sign'],
 ['a', 'group', 'of', 'people', 'sitting', 'around

In [46]:
for k in [2, 10, 100]:
    diverse_bleu = num_unique_unigrams(list(diverse_captions[k].values()))
    beam_bleu = num_unique_unigrams(list(beam_captions[k].values()))
    print("k=", k)
    print("Diverse: ", diverse_bleu)
    print("Beam: ", beam_bleu)

k= 2
Diverse:  672
Beam:  668
k= 10
Diverse:  646
Beam:  621
k= 100
Diverse:  612
Beam:  605


In [47]:
for k in [2, 10, 100]:
    diverse_bleu = num_unique_bigrams(list(diverse_captions[k].values()))
    beam_bleu = num_unique_bigrams(list(beam_captions[k].values()))
    print("k=", k)
    print("Diverse: ", diverse_bleu)
    print("Beam: ", beam_bleu)

k= 2
Diverse:  3402
Beam:  3395
k= 10
Diverse:  3023
Beam:  2778
k= 100
Diverse:  2561
Beam:  2479


# Unique captions

In [48]:
def num_unique(captions):
    caption_strings = [" ".join(c) for c in captions]
    return len(np.unique(caption_strings))

In [53]:
for k in [2, 10, 100]:
    diverse_bleu = num_unique(list(diverse_captions[k].values()))
    beam_bleu = num_unique(list(beam_captions[k].values()))
    print("k=", k)
    print("Diverse: ", diverse_bleu)
    print("Beam: ", beam_bleu)

k= 2
Diverse:  9443
Beam:  9208
k= 10
Diverse:  6424
Beam:  5488
k= 100
Diverse:  4403
Beam:  4150


# Caption length

In [54]:
def caption_length(captions):
    return np.mean([len(c) for c in captions])

In [55]:
for k in [2, 10, 100]:
    diverse_bleu = caption_length(list(diverse_captions[k].values()))
    beam_bleu = caption_length(list(beam_captions[k].values()))
    print("k=", k)
    print("Diverse: ", diverse_bleu)
    print("Beam: ", beam_bleu)

k= 2
Diverse:  8.710818684574363
Beam:  8.788169069721508
k= 10
Diverse:  9.122012640726842
Beam:  9.183562117321745
k= 100
Diverse:  9.15784015998815
Beam:  9.109668180920403
