In [1]:
import re
import cv2
import pickle
import pyttsx3
import numpy as np
from nltk.translate.bleu_score import corpus_bleu
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input

model = load_model("model_weights/model_19.h5")
model_temp = ResNet50(weights="imagenet", input_shape=(224,224,3))
model_resnet = Model(model_temp.input, model_temp.layers[-2].output)

def preprocess_image(img):
    img = load_img(img, target_size=(224,224))
    img = img_to_array(img)
    img = np.expand_dims(img, axis=0)
    img = preprocess_input(img)
    return img

def encode_image(img):
    img = preprocess_image(img)
    feature_vector = model_resnet.predict(img)
    feature_vector = feature_vector.reshape(1, feature_vector.shape[1])
    return feature_vector

with open("words_to_index.pkl", "rb") as w2i:
    words_to_index = pickle.load(w2i)
    
with open("index_to_words.pkl", "rb") as i2w:
    index_to_words = pickle.load(i2w)
 
def predict_caption(photo):

    input_text = "<s>"
    max_length = 35

    for i in range(max_length):
        sequence = []
        for w in input_text.split():
            if w in words_to_index:
                sequence.append(words_to_index[w])
        sequence = pad_sequences([sequence], maxlen = max_length, padding = "post")

        y_pred = model.predict([photo, sequence])
        y_pred = y_pred.argmax()
        word = index_to_words[y_pred]
        input_text += ' ' + word
        
        if word == "<e>":
            break
        
    final_caption =  input_text.split()
    final_caption = final_caption[1:-1]
    final_caption = ' '.join(final_caption)
    return final_caption

def caption_image(image):

    encoded_img = encode_image(image)
    caption = predict_caption(encoded_img)
    return caption


In [5]:
def Read_File(path):
    with open(path) as f:
        line = f.read()
    return line

path = "Data/Flickr_Text_Data/Flickr_8k.testImages.txt"
image = Read_File(path)
image = image.split("\n")[:-1]
img_name = []
for img in image:
    image = img.split(".")[0]
    img_name.append(image)

path = "Data/Flickr_Text_Data/Flickr8k.token.txt"
captions = Read_File(path)
captions = captions.split("\n")[:-1]

captions = [i.lower() for i in captions]
descriptions = {}

for x in captions:
    first, second = x.split("\t")   
    second = re.sub("[^a-z]+", " ", second)
    image_name = first.split(".")[0]
    if image_name in img_name:
        if descriptions.get(image_name) is None:
            descriptions[image_name] = []       
        descriptions[image_name].append("<s> " + second + " <e>")


In [6]:
actual, predicted = [], []
for key, description_list in descriptions.items():
    y = caption_image("Data/Images/" + key + ".jpg")
    y = "<s> " + y + " <e>"
    references = [d.split() for d in description_list]
    actual.append(references)
    predicted.append(y.split())


In [7]:
print('BLEU1 SCORE FOR - 1 gram: %f' % corpus_bleu(actual, predicted, weights=(1.0, 0, 0, 0)))
print('BLEU2 SCORE FOR - 2 gram: %f' % corpus_bleu(actual, predicted, weights=(0.5, 0.5, 0, 0)))
print('BLEU3 SCORE FOR - 3 gram: %f' % corpus_bleu(actual, predicted, weights=(0.33, 0.33, 0.33, 0)))
print('BLEU4 SCORE FOR - 4 gram: %f' % corpus_bleu(actual, predicted, weights=(0.25, 0.25, 0.25, 0.25)))

BLEU1 SCORE FOR - 1 gram: 0.527321
BLEU2 SCORE FOR - 2 gram: 0.273850
BLEU3 SCORE FOR - 3 gram: 0.155686
BLEU4 SCORE FOR - 4 gram: 0.085186
