In [2]:
import warnings
warnings.filterwarnings(action='once')

import torch
import torch.nn.functional as F
import numpy as np
import json
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import skimage.transform
import argparse
from scipy.misc import imread, imresize
from PIL import Image

import warnings
warnings.filterwarnings('ignore')



In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
def read_mat_file(matfile):
    import scipy.io as sio
    
    mat_contents = sio.loadmat(matfile)
    data = mat_contents["data"]
    return data[0,0][7]

def load_matfile(img):
    if len(img.shape)==2:
        img = img[:,:,np.newaxis]
        img = np.concatenate([img,img,img],axis=2)
    
    img = imresize(img,(256,256))
    img = img.transpose(2, 0, 1)
    img = img /255
    img = torch.FloatTensor(img).to(device)
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    transform = transforms.Compose([normalize])
    image = transform(img)  # (3, 256, 256)

    return image

In [5]:
def load_image(image_path):
    # Read image and process
    img = imread(image_path)
    if len(img.shape) == 2:
        img = img[:, :, np.newaxis]
        img = np.concatenate([img, img, img], axis=2)
    img = imresize(img, (256, 256))
    img = img.transpose(2, 0, 1)
    img = img / 255.
    img = torch.FloatTensor(img).to(device)
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    transform = transforms.Compose([normalize])
    image = transform(img)  # (3, 256, 256)
    return image

In [6]:
def caption_image_beam_search(encoder, decoder, image_path, word_map, beam_size=3,read_mat = False):
    """
    Reads an image and captions it with beam search.

    :param encoder: encoder model
    :param decoder: decoder model
    :param image_path: path to image
    :param word_map: word map
    :param beam_size: number of sequences to consider at each decode-step
    :return: caption, weights for visualization
    """

    k = beam_size
    vocab_size = len(word_map)

    # Read image and process
    if read_mat:
        image = load_matfile(image_path)
    else:
        image = load_image(image_path)
    """    
    img = imread(image_path)
    if len(img.shape) == 2:
        img = img[:, :, np.newaxis]
        img = np.concatenate([img, img, img], axis=2)
    img = imresize(img, (256, 256))
    img = img.transpose(2, 0, 1)
    img = img / 255.
    img = torch.FloatTensor(img).to(device)
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    transform = transforms.Compose([normalize])
    image = transform(img)  # (3, 256, 256)
    """

    # Encode
    image = image.unsqueeze(0)  # (1, 3, 256, 256)
    encoder_out = encoder(image)  # (1, enc_image_size, enc_image_size, encoder_dim)
    enc_image_size = encoder_out.size(1)
    encoder_dim = encoder_out.size(3)

    # Flatten encoding
    encoder_out = encoder_out.view(1, -1, encoder_dim)  # (1, num_pixels, encoder_dim)
    num_pixels = encoder_out.size(1)

    # We'll treat the problem as having a batch size of k
    encoder_out = encoder_out.expand(k, num_pixels, encoder_dim)  # (k, num_pixels, encoder_dim)

    # Tensor to store top k previous words at each step; now they're just <start>
    k_prev_words = torch.LongTensor([[word_map['<start>']]] * k).to(device)  # (k, 1)

    # Tensor to store top k sequences; now they're just <start>
    seqs = k_prev_words  # (k, 1)

    # Tensor to store top k sequences' scores; now they're just 0
    top_k_scores = torch.zeros(k, 1).to(device)  # (k, 1)

    # Tensor to store top k sequences' alphas; now they're just 1s
    seqs_alpha = torch.ones(k, 1, enc_image_size, enc_image_size).to(device)  # (k, 1, enc_image_size, enc_image_size)

    # Lists to store completed sequences, their alphas and scores
    complete_seqs = list()
    complete_seqs_alpha = list()
    complete_seqs_scores = list()

    # Start decoding
    step = 1
    h, c = decoder.init_hidden_state(encoder_out)

    # s is a number less than or equal to k, because sequences are removed from this process once they hit <end>
    while True:

        embeddings = decoder.embedding(k_prev_words).squeeze(1)  # (s, embed_dim)

        awe, alpha = decoder.attention(encoder_out, h)  # (s, encoder_dim), (s, num_pixels)

        alpha = alpha.view(-1, enc_image_size, enc_image_size)  # (s, enc_image_size, enc_image_size)

        gate = decoder.sigmoid(decoder.f_beta(h))  # gating scalar, (s, encoder_dim)
        awe = gate * awe

        h, c = decoder.decode_step(torch.cat([embeddings, awe], dim=1), (h, c))  # (s, decoder_dim)

        scores = decoder.fc(h)  # (s, vocab_size)
        scores = F.log_softmax(scores, dim=1)

        # Add
        scores = top_k_scores.expand_as(scores) + scores  # (s, vocab_size)

        # For the first step, all k points will have the same scores (since same k previous words, h, c)
        if step == 1:
            top_k_scores, top_k_words = scores[0].topk(k, 0, True, True)  # (s)
        else:
            # Unroll and find top scores, and their unrolled indices
            top_k_scores, top_k_words = scores.view(-1).topk(k, 0, True, True)  # (s)

        # Convert unrolled indices to actual indices of scores
        prev_word_inds = top_k_words / vocab_size  # (s)
        next_word_inds = top_k_words % vocab_size  # (s)

        # Add new words to sequences, alphas
        seqs = torch.cat([seqs[prev_word_inds], next_word_inds.unsqueeze(1)], dim=1)  # (s, step+1)
        seqs_alpha = torch.cat([seqs_alpha[prev_word_inds], alpha[prev_word_inds].unsqueeze(1)],
                               dim=1)  # (s, step+1, enc_image_size, enc_image_size)

        # Which sequences are incomplete (didn't reach <end>)?
        incomplete_inds = [ind for ind, next_word in enumerate(next_word_inds) if
                           next_word != word_map['<end>']]
        complete_inds = list(set(range(len(next_word_inds))) - set(incomplete_inds))

        # Set aside complete sequences
        if len(complete_inds) > 0:
            complete_seqs.extend(seqs[complete_inds].tolist())
            complete_seqs_alpha.extend(seqs_alpha[complete_inds].tolist())
            complete_seqs_scores.extend(top_k_scores[complete_inds])
        k -= len(complete_inds)  # reduce beam length accordingly

        # Proceed with incomplete sequences
        if k == 0:
            break
        seqs = seqs[incomplete_inds]
        seqs_alpha = seqs_alpha[incomplete_inds]
        h = h[prev_word_inds[incomplete_inds]]
        c = c[prev_word_inds[incomplete_inds]]
        encoder_out = encoder_out[prev_word_inds[incomplete_inds]]
        top_k_scores = top_k_scores[incomplete_inds].unsqueeze(1)
        k_prev_words = next_word_inds[incomplete_inds].unsqueeze(1)

        # Break if things have been going on too long
        if step > 50:
            break
        step += 1
    #print(len(complete_seqs_scores))
    if len(complete_seqs_scores) == 0:
        seq = [9489]
        alphas = [0]
    else:
        i = complete_seqs_scores.index(max(complete_seqs_scores))
        seq = complete_seqs[i]
        alphas = complete_seqs_alpha[i]

    return seq, alphas

In [7]:
#warnings.filterwarnings('ignore')
# Load model
checkpoint = torch.load("models/BEST_checkpoint_coco_5_cap_per_img_5_min_word_freq.pth.tar")
decoder = checkpoint['decoder']
decoder = decoder.to(device)
decoder.eval()
encoder = checkpoint['encoder']
encoder = encoder.to(device)
encoder.eval()
print("Loaded encoder and decoder!")

Loaded encoder and decoder!


In [8]:
# Load word map (word2ix)
with open("models/WORDMAP_coco_5_cap_per_img_5_min_word_freq.json", 'r') as j:
    word_map = json.load(j)
rev_word_map = {v: k for k, v in word_map.items()}  # ix2word

In [10]:
images_test = "/home/pramod/Downloads/datasets/image_caption/pascal_image_data/test/"
image_eg = "image_1.jpg"
seq, alphas = caption_image_beam_search(encoder, decoder, images_test+image_eg, word_map, beam_size=5)

In [12]:
words = [rev_word_map[ind] for ind in seq]
print(words)
print(len(words))

['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
11


In [13]:
import pandas as pd
import os

In [14]:
show_tell_attend_df  = pd.DataFrame()
count = 0

images = "/home/pramod/Downloads/datasets/image_caption/pascal_image_data/test/"
for img in os.listdir(images):
        if os.path.splitext(img)[1] == '.png' or os.path.splitext(img)[1] =='.jpeg' or os.path.splitext(img)[1] =='.jpg':
    
            seq, _ = caption_image_beam_search(encoder, decoder, images+img, word_map, beam_size=5)
            words = [rev_word_map[ind] for ind in seq]
            show_tell_attend_df=show_tell_attend_df.append(
            pd.DataFrame({"img":img,"show_attend_tell":str(words)},index=[0]),ignore_index=True)
            if count %500 ==0:
                print(img)
                print(seq)
                print(words)
            count +=1
            

image_1.jpg
[9488, 1, 2, 3, 1, 4070, 35, 55, 1, 974, 9489]
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
image_1449.jpg
[9488, 1, 570, 86, 17, 1, 68, 32, 1, 716, 9489]
['<start>', 'a', 'close', 'up', 'of', 'a', 'cow', 'in', 'a', 'field', '<end>']
image_19.jpg
[9488, 1, 974, 370, 6, 14, 587, 61, 23, 1, 974, 9489]
['<start>', 'a', 'dog', 'laying', 'on', 'the', 'ground', 'next', 'to', 'a', 'dog', '<end>']
image_2349.jpg
[9488, 1, 893, 123, 3, 1, 896, 28, 1, 2653, 9489]
['<start>', 'a', 'living', 'room', 'with', 'a', 'tv', 'and', 'a', 'fireplace', '<end>']
image_28.jpg
[9488, 1, 207, 799, 71, 61, 23, 1, 1975, 427, 9489]
['<start>', 'a', 'brown', 'horse', 'standing', 'next', 'to', 'a', 'wire', 'fence', '<end>']
image_3249.jpg
[9488, 1, 124, 98, 6, 1, 1603, 32, 1, 123, 9489]
['<start>', 'a', 'cat', 'sitting', 'on', 'a', 'bed', 'in', 'a', 'room', '<end>']
image_37.jpg
[9488, 1, 645, 409, 6, 14, 37, 17, 1, 289, 9489]
['<start>', 'a', 'car', 'parked', 'on',

In [15]:
matfile = '/home/pramod/Downloads/datasets/image_caption/labelme/data_for_image_caption.mat'

In [16]:
mat_data = False
if mat_data:
    #read matfile
    img_list = read_mat_file(matfile)
    print(len(img_list[0]))
    #perform caption generation
    show_tell_attend_df  = pd.DataFrame()
    count = 0

    for i in range(len(img_list[0])):
        seq, _ = caption_image_beam_search(encoder, decoder, img_list[0][i], word_map, beam_size=5,read_mat=False)
        words = [rev_word_map[ind] for ind in seq]
        show_tell_attend_df=show_tell_attend_df.append(
        pd.DataFrame({"img":i,"show_attend_tell":str(words)},index=[0]),ignore_index=True)
        if count %500 ==0:
            print(i)
            print(seq)
            print(words)
        count +=1

In [17]:
show_tell_attend_df.shape

(4952, 2)

In [18]:
show_tell_attend_df.to_csv("pascal_test.csv")

In [28]:
results = pd.read_csv("../show_tell/pascal_test.csv")
print(results.shape)

(4952, 3)


In [20]:
pd.merge(results,show_tell_attend_df,on="img").to_csv("pascal_test_show_attend_tell.csv")

# Calculate BLEU Score

In [21]:
from nltk.translate.bleu_score import sentence_bleu

In [29]:
show_tell_attend_df.head()

Unnamed: 0,img,show_attend_tell
0,image_1.jpg,"['<start>', 'a', 'man', 'with', 'a', 'beard', ..."
1,image_10.jpg,"['<start>', 'a', 'yellow', 'taxi', 'cab', 'dri..."
2,image_100.jpg,"['<start>', 'a', 'close', 'up', 'of', 'a', 'bi..."
3,image_1000.jpg,"['<start>', 'a', 'group', 'of', 'people', 'sta..."
4,image_1001.jpg,"['<start>', 'a', 'green', 'bus', 'parked', 'on..."


In [86]:
#reference - human translation
#candidate - machine translation

reference = show_tell_attend_df.iloc[1,1]
candidate = show_tell_attend_df.iloc[0,1]
score1 = sentence_bleu([reference], candidate)
score2 = sentence_bleu([candidate],reference)
print(score1, score2)

0.6223090127514708 0.6222160789161449


In [78]:
sentences_list = [show_tell_attend_df.iloc[i,1] for i in range(show_tell_attend_df.shape[0])]

In [99]:
for sentence in sentences_list:
    for i in range(2):
        print(sentence)
        print(show_tell_attend_df.iloc[i,1])
        print(sentence_bleu([sentence],show_tell_attend_df.iloc[i,1]))

['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
1.0
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.6222160789161449
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.6223090127514708
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
1.0
['<start>', 'a', 'close', 'up', 'of', 'a', 'bird', 'in', 'the', 'water', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.6552337859449965
['<start>', 'a', 'close', 'up', 'of', 'a', 'bird', 'in', 'the', 'water', '<end>']
['<start>',

0.5810305455512761
['<start>', 'a', 'man', 'is', 'standing', 'in', 'the', 'doorway', 'of', 'a', 'train', 'station', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.5164115819411681
['<start>', 'a', 'living', 'room', 'filled', 'with', 'furniture', 'and', 'a', 'lamp', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.6252298839413547
['<start>', 'a', 'living', 'room', 'filled', 'with', 'furniture', 'and', 'a', 'lamp', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.5539047368833985
['<start>', 'a', 'couple', 'of', 'women', 'laying', 'on', 'a', 'bed', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.6367939210783171
['<start>', 'a', 'couple', 'of', 'women', 'laying', 'on', 'a', 'bed', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.6076456430442215
['<start>

0.5630941712841285
['<start>', 'two', 'people', 'standing', 'next', 'to', 'a', 'white', 'truck', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.5957281781774617
['<start>', 'a', 'yellow', 'and', 'black', 'motorcycle', 'parked', 'in', 'a', 'garage', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.5701934530428012
['<start>', 'a', 'yellow', 'and', 'black', 'motorcycle', 'parked', 'in', 'a', 'garage', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.5750563552856861
['<start>', 'a', 'woman', 'holding', 'a', 'baby', 'with', 'a', 'teddy', 'bear', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.8080592164783575
['<start>', 'a', 'woman', 'holding', 'a', 'baby', 'with', 'a', 'teddy', 'bear', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.5901855319297746
['<s

['<start>', 'a', 'large', 'jetliner', 'sitting', 'on', 'top', 'of', 'an', 'airport', 'tarmac', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.5047011275535271
['<start>', 'a', 'large', 'jetliner', 'sitting', 'on', 'top', 'of', 'an', 'airport', 'tarmac', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.5026689845591387
['<start>', 'a', 'woman', 'riding', 'a', 'skateboard', 'down', 'a', 'street', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.7085675048726623
['<start>', 'a', 'woman', 'riding', 'a', 'skateboard', 'down', 'a', 'street', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.7289084103194675
['<start>', 'a', 'dog', 'running', 'in', 'the', 'snow', 'with', 'a', 'frisbee', 'in', 'its', 'mouth', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.5688436

0.5667190695755381
['<start>', 'a', 'small', 'dog', 'sitting', 'in', 'the', 'back', 'of', 'a', 'car', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.6602885174683945
['<start>', 'a', 'small', 'dog', 'sitting', 'in', 'the', 'back', 'of', 'a', 'car', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.6167748551534863
['<start>', 'a', 'rear', 'view', 'mirror', 'of', 'a', 'car', 'driving', 'down', 'a', 'road', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.577814099685123
['<start>', 'a', 'rear', 'view', 'mirror', 'of', 'a', 'car', 'driving', 'down', 'a', 'road', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.6116062234088823
['<start>', 'a', 'bike', 'parked', 'next', 'to', 'a', 'metal', 'fence', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.5884875049513

['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.4978767342213093
['<start>', 'a', 'couple', 'of', 'birds', 'that', 'are', 'standing', 'in', 'the', 'grass', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.5563463725600144
['<start>', 'a', 'couple', 'of', 'birds', 'that', 'are', 'standing', 'in', 'the', 'grass', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.5417717210571225
['<start>', 'a', 'dog', 'that', 'is', 'standing', 'in', 'the', 'dirt', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.6856856518393089
['<start>', 'a', 'dog', 'that', 'is', 'standing', 'in', 'the', 'dirt', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.6377558759637982
['<start>', 'a', 'black', 'and', 'white', 'cow', 'eating', 'grass', 'in', 'a', 'field', '<end>']
['<start>', 'a', 'man', '

['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.5343605048150033
['<start>', 'a', 'room', 'with', 'a', 'red', 'chair', 'and', 'a', 'red', 'chair', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.6475629530855648
['<start>', 'a', 'room', 'with', 'a', 'red', 'chair', 'and', 'a', 'red', 'chair', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.5296563748978259
['<start>', 'a', 'close', 'up', 'of', 'a', 'car', 'with', 'a', 'red', 'light', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.6647596254622143
['<start>', 'a', 'close', 'up', 'of', 'a', 'car', 'with', 'a', 'red', 'light', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.5759034900111919
['<start>', 'a', 'man', 'riding', 'on', 'the', 'back', 'of', 'a', 'brown', 'horse', '<end>']
['<start>', 'a', 'man', 'with',

0.6105461819872672
['<start>', 'a', 'red', 'car', 'parked', 'in', 'a', 'parking', 'lot', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.6239497063514475
['<start>', 'a', 'red', 'car', 'parked', 'in', 'a', 'parking', 'lot', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.6338507285180492
['<start>', 'a', 'bike', 'with', 'a', 'hat', 'on', 'top', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.6163378055050976
['<start>', 'a', 'bike', 'with', 'a', 'hat', 'on', 'top', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.5481776183864182
['<start>', 'a', 'couple', 'of', 'people', 'standing', 'next', 'to', 'each', 'other', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.538373982928185
['<start>', 'a', 'couple', 'of', 'people', 'standing', 'next', 'to', 'each', '

0.6076456430442215
['<start>', 'two', 'women', 'sitting', 'at', 'a', 'table', 'with', 'wine', 'glasses', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.6038394799506045
['<start>', 'two', 'women', 'sitting', 'at', 'a', 'table', 'with', 'wine', 'glasses', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.5595174569363913
['<start>', 'two', 'people', 'sitting', 'on', 'a', 'couch', 'with', 'a', 'dog', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.7033680798039559
['<start>', 'two', 'people', 'sitting', 'on', 'a', 'couch', 'with', 'a', 'dog', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.6225071046015808
['<start>', 'a', 'large', 'jetliner', 'sitting', 'on', 'top', 'of', 'an', 'airport', 'runway', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.502511483

0.6495829208675258
['<start>', 'a', 'man', 'riding', 'on', 'the', 'back', 'of', 'a', 'brown', 'horse', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.5776774408879932
['<start>', 'a', 'living', 'room', 'filled', 'with', 'furniture', 'and', 'a', 'lamp', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.6252298839413547
['<start>', 'a', 'living', 'room', 'filled', 'with', 'furniture', 'and', 'a', 'lamp', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.5539047368833985
['<start>', 'a', 'man', 'standing', 'next', 'to', 'a', 'parked', 'motorcycle', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.6387668640744919
['<start>', 'a', 'man', 'standing', 'next', 'to', 'a', 'parked', 'motorcycle', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.6313226661604213
['<s

0.6146243638052402
['<start>', 'a', 'bouquet', 'of', 'pink', 'flowers', 'in', 'a', 'garden', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.6031487253013109
['<start>', 'a', 'close', 'up', 'of', 'a', 'car', 'parked', 'in', 'a', 'parking', 'lot', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.5799698503436834
['<start>', 'a', 'close', 'up', 'of', 'a', 'car', 'parked', 'in', 'a', 'parking', 'lot', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.5442952521936681
['<start>', 'a', 'small', 'bird', 'sitting', 'on', 'a', 'tree', 'branch', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.6570216164370384
['<start>', 'a', 'small', 'bird', 'sitting', 'on', 'a', 'tree', 'branch', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.6510035498574047
['<start>', 'a', '

['<start>', 'a', 'cat', 'laying', 'on', 'top', 'of', 'a', 'red', 'suitcase', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.6288743594731836
['<start>', 'a', 'cat', 'laying', 'on', 'top', 'of', 'a', 'red', 'suitcase', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.650210160378708
['<start>', 'a', 'cat', 'that', 'is', 'standing', 'on', 'a', 'table', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.660758289370845
['<start>', 'a', 'cat', 'that', 'is', 'standing', 'on', 'a', 'table', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.671226726764611
['<start>', 'a', 'car', 'is', 'parked', 'in', 'a', 'parking', 'lot', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.6578409629895765
['<start>', 'a', 'car', 'is', 'parked', 'in', 'a', 'parking', 'lot', '<end>']


0.5757867344220025
['<start>', 'a', 'group', 'of', 'ducks', 'floating', 'on', 'top', 'of', 'a', 'lake', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.5577287378255189
['<start>', 'a', 'man', 'and', 'a', 'woman', 'in', 'a', 'wedding', 'dress', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.7575763772334728
['<start>', 'a', 'man', 'and', 'a', 'woman', 'in', 'a', 'wedding', 'dress', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.6186065258902375
['<start>', 'a', 'blue', 'motorcycle', 'parked', 'in', 'a', 'parking', 'lot', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.6237544567723835
['<start>', 'a', 'blue', 'motorcycle', 'parked', 'in', 'a', 'parking', 'lot', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.6064108816631052
['<start>', 'a', 'small',

0.6251305196791851
['<start>', 'a', 'close', 'up', 'of', 'a', 'circular', 'mirror', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.5327275655592708
['<start>', 'a', 'close', 'up', 'of', 'a', 'circular', 'mirror', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.5306008661611274
['<start>', 'a', 'group', 'of', 'birds', 'sitting', 'on', 'a', 'bird', 'feeder', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.6302627251748246
['<start>', 'a', 'group', 'of', 'birds', 'sitting', 'on', 'a', 'bird', 'feeder', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.5792736657717191
['<start>', 'a', 'flat', 'screen', 'tv', 'sitting', 'on', 'top', 'of', 'a', 'wooden', 'table', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.5079686715433027
['<start>', 'a', 'flat', 'screen

0.570518768951604
['<start>', 'a', 'man', 'riding', 'a', 'bike', 'with', 'a', 'helmet', 'on', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.7922437413046267
['<start>', 'a', 'man', 'riding', 'a', 'bike', 'with', 'a', 'helmet', 'on', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.6280047587631943
['<start>', 'a', 'group', 'of', 'animals', 'that', 'are', 'standing', 'in', 'the', 'dirt', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.5650648348607568
['<start>', 'a', 'group', 'of', 'animals', 'that', 'are', 'standing', 'in', 'the', 'dirt', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.5364855112075629
['<start>', 'a', 'table', 'with', 'wine', 'glasses', 'and', 'wine', 'glasses', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.6037161779945602
['<star

['<start>', 'a', 'bus', 'is', 'parked', 'at', 'a', 'bus', 'station', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.5937827842586197
['<start>', 'a', 'group', 'of', 'people', 'riding', 'on', 'top', 'of', 'a', 'body', 'of', 'water', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.4974886179879585
['<start>', 'a', 'group', 'of', 'people', 'riding', 'on', 'top', 'of', 'a', 'body', 'of', 'water', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.47071201419075603
['<start>', 'a', 'train', 'yard', 'with', 'a', 'train', 'on', 'the', 'tracks', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.6757866493335688
['<start>', 'a', 'train', 'yard', 'with', 'a', 'train', 'on', 'the', 'tracks', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.5759034900111919
['<start>',

0.6560527744660274
['<start>', 'a', 'white', 'vase', 'filled', 'with', 'lots', 'of', 'green', 'plants', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.5841925915155736
['<start>', 'a', 'white', 'vase', 'filled', 'with', 'lots', 'of', 'green', 'plants', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.5017253374798105
['<start>', 'a', 'group', 'of', 'women', 'standing', 'next', 'to', 'each', 'other', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.574319143675947
['<start>', 'a', 'group', 'of', 'women', 'standing', 'next', 'to', 'each', 'other', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.559737608205144
['<start>', 'a', 'blue', 'and', 'white', 'train', 'traveling', 'down', 'train', 'tracks', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.5723884773

['<start>', 'a', 'bike', 'parked', 'on', 'a', 'sidewalk', 'next', 'to', 'a', 'building', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.6069129834982265
['<start>', 'a', 'bike', 'parked', 'on', 'a', 'sidewalk', 'next', 'to', 'a', 'building', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.5508524042228323
['<start>', 'a', 'black', 'car', 'parked', 'in', 'a', 'parking', 'lot', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.6341991705871414
['<start>', 'a', 'black', 'car', 'parked', 'in', 'a', 'parking', 'lot', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.6310018891040166
['<start>', 'a', 'boat', 'that', 'is', 'sitting', 'in', 'the', 'water', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.6416294932514401
['<start>', 'a', 'boat', 'that', 'is', 'sitt

0.5609802284380578
['<start>', 'a', 'living', 'room', 'filled', 'with', 'furniture', 'and', 'a', 'fire', 'place', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.5008685699977766
['<start>', 'a', 'close', 'up', 'of', 'a', 'person', 'holding', 'a', 'bird', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.7364259654815225
['<start>', 'a', 'close', 'up', 'of', 'a', 'person', 'holding', 'a', 'bird', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.6075254491744929
['<start>', 'a', 'black', 'and', 'white', 'photo', 'of', 'a', 'man', 'with', 'a', 'beard', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.6743150662846321
['<start>', 'a', 'black', 'and', 'white', 'photo', 'of', 'a', 'man', 'with', 'a', 'beard', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.4799

0.6545240236911511
['<start>', 'a', 'group', 'of', 'horses', 'running', 'in', 'a', 'field', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.5959538727010246
['<start>', 'a', 'train', 'that', 'is', 'sitting', 'on', 'the', 'tracks', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.6165140843749526
['<start>', 'a', 'train', 'that', 'is', 'sitting', 'on', 'the', 'tracks', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.5989744349047987
['<start>', 'a', 'train', 'traveling', 'down', 'train', 'tracks', 'next', 'to', 'a', 'forest', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.5237826224998949
['<start>', 'a', 'train', 'traveling', 'down', 'train', 'tracks', 'next', 'to', 'a', 'forest', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.5575548089005745
['<star

0.7673492738474538
['<start>', 'a', 'dog', 'that', 'is', 'laying', 'down', 'on', 'a', 'bed', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.6838548712188725
['<start>', 'a', 'black', 'and', 'red', 'train', 'traveling', 'down', 'train', 'tracks', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.5681060873531012
['<start>', 'a', 'black', 'and', 'red', 'train', 'traveling', 'down', 'train', 'tracks', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.5690412386917354
['<start>', 'a', 'black', 'car', 'parked', 'in', 'a', 'parking', 'lot', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.6341991705871414
['<start>', 'a', 'black', 'car', 'parked', 'in', 'a', 'parking', 'lot', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.6310018891040166
['<start>', 'a', 'man'

['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.6213309390530631
['<start>', 'a', 'wooden', 'boat', 'filled', 'with', 'lots', 'of', 'plants', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.5481245956424027
['<start>', 'a', 'night', 'time', 'picture', 'of', 'a', 'city', 'street', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.5549106223732326
['<start>', 'a', 'night', 'time', 'picture', 'of', 'a', 'city', 'street', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.6592546499606633
['<start>', 'a', 'boat', 'that', 'is', 'sitting', 'in', 'the', 'water', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.6416294932514401
['<start>', 'a', 'boat', 'that', 'is', 'sitting', 'in', 'the', 'water', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'st

['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.6070582897121548
['<start>', 'a', 'desktop', 'computer', 'sitting', 'on', 'top', 'of', 'a', 'desk', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.5810243056309874
['<start>', 'a', 'desktop', 'computer', 'sitting', 'on', 'top', 'of', 'a', 'desk', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.5985992994123512
['<start>', 'a', 'living', 'room', 'filled', 'with', 'furniture', 'and', 'a', 'fire', 'place', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.5609802284380578
['<start>', 'a', 'living', 'room', 'filled', 'with', 'furniture', 'and', 'a', 'fire', 'place', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.5008685699977766
['<start>', 'a', 'car', 'is', 'parked', 'in', 'a', 'snowy', 'driveway', '<end>']
['<start>

0.5966243883140556
['<start>', 'a', 'group', 'of', 'people', 'walking', 'down', 'a', 'city', 'street', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.6870831723422437
['<start>', 'a', 'black', 'and', 'white', 'cat', 'wearing', 'a', 'yellow', 'hat', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.6357146267512906
['<start>', 'a', 'black', 'and', 'white', 'cat', 'wearing', 'a', 'yellow', 'hat', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.6666974476507348
['<start>', 'a', 'couple', 'of', 'people', 'on', 'a', 'boat', 'in', 'the', 'water', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.5672918933975011
['<start>', 'a', 'couple', 'of', 'people', 'on', 'a', 'boat', 'in', 'the', 'water', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.5495025465698461
['

0.6338507285180492
['<start>', 'a', 'large', 'body', 'of', 'water', 'with', 'a', 'city', 'in', 'the', 'background', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.547240186425169
['<start>', 'a', 'large', 'body', 'of', 'water', 'with', 'a', 'city', 'in', 'the', 'background', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.47328211921179375
['<start>', 'a', 'sailboat', 'in', 'the', 'ocean', 'on', 'a', 'clear', 'day', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.6248539928136874
['<start>', 'a', 'sailboat', 'in', 'the', 'ocean', 'on', 'a', 'clear', 'day', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.6145609458765993
['<start>', 'a', 'man', 'wearing', 'a', 'pink', 'hat', 'and', 'a', 'pink', 'hat', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.6859

['<start>', 'a', 'cat', 'sleeping', 'on', 'a', 'bed', 'with', 'a', 'cat', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.6478092542056391
['<start>', 'a', 'living', 'room', 'filled', 'with', 'furniture', 'and', 'a', 'window', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.6128303482404855
['<start>', 'a', 'living', 'room', 'filled', 'with', 'furniture', 'and', 'a', 'window', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.5643405900302997
['<start>', 'a', 'man', 'and', 'a', 'woman', 'sitting', 'on', 'a', 'bench', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.7532521331515082
['<start>', 'a', 'man', 'and', 'a', 'woman', 'sitting', 'on', 'a', 'bench', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.6076456430442215
['<start>', 'a', 'man', 'riding', '

['<start>', 'a', 'man', 'sitting', 'at', 'a', 'table', 'in', 'a', 'restaurant', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.675725148115957
['<start>', 'a', 'man', 'sitting', 'at', 'a', 'table', 'in', 'a', 'restaurant', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.6188731541034878
['<start>', 'a', 'blue', 'bench', 'sitting', 'next', 'to', 'a', 'blue', 'bench', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.6026943064641965
['<start>', 'a', 'blue', 'bench', 'sitting', 'next', 'to', 'a', 'blue', 'bench', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.5842420032449925
['<start>', 'a', 'motorcycle', 'parked', 'in', 'front', 'of', 'a', 'brick', 'building', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.5967920300989707
['<start>', 'a', 'motorcycle'

['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.6545596346776343
['<start>', 'a', 'bus', 'is', 'parked', 'on', 'the', 'side', 'of', 'the', 'road', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.5851218426162196
['<start>', 'a', 'bus', 'is', 'parked', 'on', 'the', 'side', 'of', 'the', 'road', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.5158130102792341
['<start>', 'a', 'small', 'white', 'dog', 'sitting', 'in', 'the', 'grass', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.6695581841610662
['<start>', 'a', 'small', 'white', 'dog', 'sitting', 'in', 'the', 'grass', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.6135764071143371
['<start>', 'a', 'group', 'of', 'people', 'posing', 'for', 'a', 'picture', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'i

['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.5817269541634064
['<start>', 'a', 'group', 'of', 'people', 'standing', 'around', 'a', 'tennis', 'court', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.5653280664047247
['<start>', 'a', 'close', 'up', 'of', 'a', 'dog', 'in', 'a', 'field', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.7007831695295703
['<start>', 'a', 'close', 'up', 'of', 'a', 'dog', 'in', 'a', 'field', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.6118400619467311
['<start>', 'a', 'group', 'of', 'young', 'girls', 'sitting', 'next', 'to', 'each', 'other', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.5111952126471316
['<start>', 'a', 'group', 'of', 'young', 'girls', 'sitting', 'next', 'to', 'each', 'other', '<end>']
['<start>', 'a', 'yellow',

0.6738776287480711
['<start>', 'a', 'white', 'dog', 'standing', 'on', 'top', 'of', 'a', 'grass', 'covered', 'field', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.5643813998229847
['<start>', 'a', 'white', 'dog', 'standing', 'on', 'top', 'of', 'a', 'grass', 'covered', 'field', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.527104040250041
['<start>', 'a', 'group', 'of', 'people', 'sitting', 'around', 'a', 'table', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.6061503912221605
['<start>', 'a', 'group', 'of', 'people', 'sitting', 'around', 'a', 'table', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.6239836538671304
['<start>', 'a', 'man', 'in', 'a', 'yellow', 'shirt', 'is', 'playing', 'a', 'video', 'game', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end

0.5863218924209662
['<start>', 'a', 'small', 'airplane', 'flying', 'through', 'a', 'cloudy', 'sky', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.5917143103673882
['<start>', 'a', 'man', 'standing', 'next', 'to', 'a', 'small', 'airplane', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.6570190859021157
['<start>', 'a', 'man', 'standing', 'next', 'to', 'a', 'small', 'airplane', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.6484726142483397
['<start>', 'a', 'close', 'up', 'of', 'a', 'dog', 'on', 'a', 'leash', '<end>']
['<start>', 'a', 'man', 'with', 'a', 'beard', 'is', 'holding', 'a', 'dog', '<end>']
0.6854863825210183
['<start>', 'a', 'close', 'up', 'of', 'a', 'dog', 'on', 'a', 'leash', '<end>']
['<start>', 'a', 'yellow', 'taxi', 'cab', 'driving', 'down', 'a', 'street', '<end>']
0.6017590538690863
['<start>', 'a', 'group', 'of', 'sheep', 'sta