In [2]:
from beam import BeamSearch
from functools import partial
from itertools import izip, repeat
import sys
import time
sys.path.append('../coco-caption')
sys.path.append('../show-attend-and-tell-tensorflow')
from core.utils import load_pickle
from pycocoevalcap.bleu.bleu import Bleu

## Load data

In [3]:
split = 'train'
word_to_idx = load_pickle('../show-attend-and-tell-tensorflow/data/train/word_to_idx.pkl')
words = word_to_idx.keys()
words.remove('<START>')
words.remove('<END>')
words.remove('<NULL>')
words.append('.')
references = load_pickle('../show-attend-and-tell-tensorflow/data/{}/{}.references.pkl'.format(split, split))

Loaded ../show-attend-and-tell-tensorflow/data/train/word_to_idx.pkl..
Loaded ../show-attend-and-tell-tensorflow/data/train/train.references.pkl..


## Score function

In [4]:
scorer = Bleu(n=4)

def score(hypotheses, reference_sequences):
    """
    Score each hypothesis. Any sequence in reference_sequences is correct.
    Returns the best score for each hypothesis, among the reference_sequences.
    :param hypotheses: Dict of hypotheses to compute score.
    :param reference_sequences: List of ground truth sequences.
    :return scores: Dict with same keys as hypotheses, but with score as value
    """
    #hypotheses = dict(izip(xrange(0, len(hypotheses)), hypotheses))
    reference_sequences = dict(izip(hypotheses.iterkeys(), repeat(reference_sequences, len(hypotheses))))
    _, scores = scorer.compute_score(reference_sequences, hypotheses)
    return scores

## Beam Search
#### with reference actions
Reference words are as good, or better than, non-reference words in terms of BLEU score. Only a handful (~50) reference words to consider, but potentially 10k+ non-reference words. So by only considering the reference words as the set of actions available to expert, beam search is dramatically faster.
< 1s per search with reference word actions

In [5]:
start_time = time.time()
n_beams = 5
n_searches = 2
for counter, (_, reference) in enumerate(references.iteritems()):
    if counter >= n_searches:
        break

    print 'Ground truth:\n \t', '\n \t'.join(reference)
    score_wrapper = partial(score, reference_sequences=reference)
    reference_words = set()
    for sentence in reference:
        reference_words.update(sentence.split(' '))
    beam_search = BeamSearch(n_beams, score_wrapper, reference_words)
    initial_beams = [[sentence.split(' ')[0]] for sentence in reference]
    scores, sequence = beam_search.search(max_length=15, beams=initial_beams)

end_time = time.time()
print ' \n Elapsed time per search: {}s \n \n'.format(float(end_time - start_time) / n_searches)

AssertionError: 

#### with all actions
< 1s per search with reference word actions

In [None]:
start_time = time.time()
n_beams = 5
n_searches = 2
for counter, (_, reference) in enumerate(references.iteritems()):
    if counter >= n_searches:
        break

    print 'Ground truth:\n \t', '\n \t'.join(reference)
    score_wrapper = partial(score, reference_sequences=reference)
    beam_search = BeamSearch(n_beams, score_wrapper, words)
    initial_beams = [[sentence.split(' ')[0]] for sentence in reference]
    scores, sequence = beam_search.search(max_length=15, beams=initial_beams)

end_time = time.time()
print ' \n Elapsed time per search: {}s \n \n'.format(float(end_time - start_time) / n_searches)