In [1]:
from keras.preprocessing import image
from keras.models import load_model
import numpy as np

from nltk.translate.bleu_score import sentence_bleu
from nltk.translate.bleu_score import corpus_bleu

from caption_utils import *

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
def generate_seq(img_input):
    
    if img_input.shape != (1, 512):
        img_input = img_input.reshape(1, 512)

    
    assert(img_input.shape == (1, 512))
    stop_condition = False
    decoded_sentence = []
    target_seq = np.array([token2idx['<bos>']]).reshape(1, 1)
    states_value = encoder_model.predict(img_input)

    while not stop_condition:
        output_tokens, h, c = decoder_model.predict([target_seq] + states_value)
        sampled_token_index = int(np.argmax(output_tokens[0, -1, :]))
        sampled_char = idx2token[sampled_token_index]
        decoded_sentence += [sampled_char]
        if (sampled_char == '<eos>' or len(decoded_sentence) > 30):
            stop_condition = True
        target_seq = np.array([sampled_token_index]).reshape(1, 1)
        states_value = [h, c]

    return ' '.join(decoded_sentence[:-1])

In [3]:
def get_captions(model, img_path):   
    #img_path = 'data/Arnav_Hankyu_Pulkit2.jpg'
    img = image.load_img(img_path, target_size=(224, 224))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)

    features = model.predict(x)
    return generate_seq(features)

In [4]:
train_fns_list, dev_fns_list, test_fns_list = load_split_lists()
train_captions_raw, dev_captions_raw, test_captions_raw = get_caption_split()
vocab = create_vocab(train_captions_raw)
token2idx, idx2token = vocab_to_index(vocab)

## VGG16

In [5]:
from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input

encoder_model = load_model('saved_models/encoder_model.h5')
decoder_model = load_model('saved_models/decoder_model.h5')
VGG16_model = VGG16(weights='imagenet', include_top=False, pooling='avg')

  ' Found: ' + str(self.outputs))


In [6]:
all_refs = []
all_candidates = []

for i, filename in enumerate(test_fns_list):
    if i%100 == 0:
        print(i, "images processed")
    candidate = get_captions(VGG16_model, "data/Flicker8k_Dataset/"+filename).split()
    references = []    
    for j, caption in enumerate(test_captions_raw[filename]):
        references.append(caption[:-1].split())
    all_refs.append(references)
    all_candidates.append(candidate)

0 images processed
100 images processed
200 images processed
300 images processed
400 images processed
500 images processed
600 images processed
700 images processed
800 images processed
900 images processed


In [11]:
bleu1 = corpus_bleu(all_refs, all_candidates, weights=(1, 0, 0, 0))
bleu2 = corpus_bleu(all_refs, all_candidates, weights=(0, 1, 0, 0))
bleu3 = corpus_bleu(all_refs, all_candidates, weights=(0, 0, 1, 0))
bleu4 = corpus_bleu(all_refs, all_candidates, weights=(0, 0, 0, 1))

In [18]:
print("VGG16")
print("Bleu1 Score: {:.2f}".format(bleu1*100))
print("Bleu2 Score: {:.2f}".format(bleu2*100))
print("Bleu3 Score: {:.2f}".format(bleu3*100))
print("Bleu4 Score: {:.2f}".format(bleu4*100))

VGG16
Bleu1 Score: 51.26
Bleu2 Score: 21.41
Bleu3 Score: 8.32
Bleu4 Score: 3.31


## VGG19

In [19]:
from keras.applications.vgg19 import VGG19
from keras.applications.vgg19 import preprocess_input

encoder_model = load_model('saved_models/encoder_model_VGG19.h5')
decoder_model = load_model('saved_models/decoder_model_VGG19.h5')
VGG19_model = VGG19(weights='imagenet', include_top=False, pooling='avg')

  ' Found: ' + str(self.outputs))


In [20]:
all_refs = []
all_candidates = []

for i, filename in enumerate(test_fns_list):
    if i%100 == 0:
        print(i, "images processed")
    candidate = get_captions(VGG19_model, "data/Flicker8k_Dataset/"+filename).split()
    references = []    
    for j, caption in enumerate(test_captions_raw[filename]):
        references.append(caption[:-1].split())
    all_refs.append(references)
    all_candidates.append(candidate)

0 images processed
100 images processed
200 images processed
300 images processed
400 images processed
500 images processed
600 images processed
700 images processed
800 images processed
900 images processed


In [21]:
bleu1 = corpus_bleu(all_refs, all_candidates, weights=(1, 0, 0, 0))
bleu2 = corpus_bleu(all_refs, all_candidates, weights=(0, 1, 0, 0))
bleu3 = corpus_bleu(all_refs, all_candidates, weights=(0, 0, 1, 0))
bleu4 = corpus_bleu(all_refs, all_candidates, weights=(0, 0, 0, 1))

In [22]:
print("VGG19")
print("Bleu1 Score: {:.2f}".format(bleu1*100))
print("Bleu2 Score: {:.2f}".format(bleu2*100))
print("Bleu3 Score: {:.2f}".format(bleu3*100))
print("Bleu4 Score: {:.2f}".format(bleu4*100))

VGG19
Bleu1 Score: 52.64
Bleu2 Score: 21.95
Bleu3 Score: 8.24
Bleu4 Score: 3.26


## ResNet50

In [23]:
def generate_seq(img_input):
    
    if img_input.shape != (1, 2048):
        img_input = img_input.reshape(1, 2048)

    
    assert(img_input.shape == (1, 2048))
    stop_condition = False
    decoded_sentence = []
    target_seq = np.array([token2idx['<bos>']]).reshape(1, 1)
    states_value = encoder_model.predict(img_input)

    while not stop_condition:
        output_tokens, h, c = decoder_model.predict([target_seq] + states_value)
        sampled_token_index = int(np.argmax(output_tokens[0, -1, :]))
        sampled_char = idx2token[sampled_token_index]
        decoded_sentence += [sampled_char]
        if (sampled_char == '<eos>' or len(decoded_sentence) > 30):
            stop_condition = True
        target_seq = np.array([sampled_token_index]).reshape(1, 1)
        states_value = [h, c]

    return ' '.join(decoded_sentence[:-1])

In [24]:
from keras.applications.resnet50 import ResNet50
from keras.applications.resnet50 import preprocess_input

encoder_model = load_model('saved_models/encoder_model_ResNet50.h5')
decoder_model = load_model('saved_models/decoder_model_ResNet50.h5')
ResNet50_model = ResNet50(weights='imagenet', include_top=False, pooling='avg')

  ' Found: ' + str(self.outputs))


In [25]:
all_refs = []
all_candidates = []

for i, filename in enumerate(test_fns_list):
    if i%100 == 0:
        print(i, "images processed")
    candidate = get_captions(ResNet50_model, "data/Flicker8k_Dataset/"+filename).split()
    references = []    
    for j, caption in enumerate(test_captions_raw[filename]):
        references.append(caption[:-1].split())
    all_refs.append(references)
    all_candidates.append(candidate)

0 images processed
100 images processed
200 images processed
300 images processed
400 images processed
500 images processed
600 images processed
700 images processed
800 images processed
900 images processed


In [26]:
bleu1 = corpus_bleu(all_refs, all_candidates, weights=(1, 0, 0, 0))
bleu2 = corpus_bleu(all_refs, all_candidates, weights=(0, 1, 0, 0))
bleu3 = corpus_bleu(all_refs, all_candidates, weights=(0, 0, 1, 0))
bleu4 = corpus_bleu(all_refs, all_candidates, weights=(0, 0, 0, 1))

In [27]:
print("ResNet50")
print("Bleu1 Score: {:.2f}".format(bleu1*100))
print("Bleu2 Score: {:.2f}".format(bleu2*100))
print("Bleu3 Score: {:.2f}".format(bleu3*100))
print("Bleu4 Score: {:.2f}".format(bleu4*100))

ResNet50
Bleu1 Score: 51.60
Bleu2 Score: 22.71
Bleu3 Score: 8.99
Bleu4 Score: 3.94


## Xception

In [28]:
from keras.applications.xception import Xception
from keras.applications.xception import preprocess_input

encoder_model = load_model('saved_models/encoder_model_ResNet50.h5')
decoder_model = load_model('saved_models/decoder_model_ResNet50.h5')
Xception_model = Xception(weights='imagenet', include_top=False, pooling='avg')

  ' Found: ' + str(self.outputs))


In [29]:
all_refs = []
all_candidates = []

for i, filename in enumerate(test_fns_list):
    if i%100 == 0:
        print(i, "images processed")
    candidate = get_captions(Xception_model, "data/Flicker8k_Dataset/"+filename).split()
    references = []    
    for j, caption in enumerate(test_captions_raw[filename]):
        references.append(caption[:-1].split())
    all_refs.append(references)
    all_candidates.append(candidate)

0 images processed
100 images processed
200 images processed
300 images processed
400 images processed
500 images processed
600 images processed
700 images processed
800 images processed
900 images processed


In [30]:
bleu1 = corpus_bleu(all_refs, all_candidates, weights=(1, 0, 0, 0))
bleu2 = corpus_bleu(all_refs, all_candidates, weights=(0, 1, 0, 0))
bleu3 = corpus_bleu(all_refs, all_candidates, weights=(0, 0, 1, 0))
bleu4 = corpus_bleu(all_refs, all_candidates, weights=(0, 0, 0, 1))

In [31]:
print("Xception")
print("Bleu1 Score: {:.2f}".format(bleu1*100))
print("Bleu2 Score: {:.2f}".format(bleu2*100))
print("Bleu3 Score: {:.2f}".format(bleu3*100))
print("Bleu4 Score: {:.2f}".format(bleu4*100))

Xception
Bleu1 Score: 33.24
Bleu2 Score: 5.54
Bleu3 Score: 1.10
Bleu4 Score: 0.50
