In [1]:
import pickle
import numpy as np
from tqdm.notebook import tqdm # ui for data processing
import matplotlib.pyplot as plt
from keras.layers import TextVectorization
from keras.models import Model, load_model
from keras.utils import load_img, img_to_array, pad_sequences
from textwrap import wrap
from contextlib import redirect_stdout
from nltk.translate import bleu_score, meteor
from nltk import word_tokenize

from google.colab import drive
import warnings
warnings.filterwarnings("ignore")

import nltk
nltk.download("punkt")
nltk.download("wordnet")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


True

In [2]:
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [3]:
GDRIVE_WORKING_PATH = "/content/gdrive/MyDrive/image_cap"
MODEL_NAMES = ["basic", "plus_dropout", "plus_reg", "plus_dropout_places", "dropout_aug"]

In [4]:
with open(f"{GDRIVE_WORKING_PATH}/list_id_test.pkl", "rb") as f:
  test_ids = pickle.load(f)

In [5]:
def get_vgg_imagenet():
  with open(f"{GDRIVE_WORKING_PATH}/dict_features_test_vgg16.pkl", "rb") as f:
    img_features_test = pickle.load(f)
  return img_features_test

In [6]:
def get_vgg_places():
  with open(f"{GDRIVE_WORKING_PATH}/dict_features_test_vgg16_places.pkl", "rb") as f:
    img_features_test = pickle.load(f)
  return img_features_test

In [7]:
with open(f"{GDRIVE_WORKING_PATH}/dict_clean_captions_test.pkl", "rb") as f:
  clean_captions_test = pickle.load(f)

In [8]:
def get_vectorizer():
  from_disk = pickle.load(open(f"{GDRIVE_WORKING_PATH}/vectorizer.pkl", "rb"))
  new_v = TextVectorization.from_config(from_disk.get("config"))
  new_v.set_weights(from_disk.get("weights"))
  return new_v

In [9]:
def get_vectorizer_aug():
  from_disk = pickle.load(open(f"{GDRIVE_WORKING_PATH}/vectorizer_augmented.pkl", "rb"))
  new_v = TextVectorization.from_config(from_disk.get("config"))
  new_v.set_weights(from_disk.get("weights"))
  return new_v

In [10]:
def readImage(path, img_size=224):
  img = load_img(path, color_mode='rgb', target_size=(img_size, img_size))
  img = img_to_array(img)
  img = img/255

  return img

In [11]:
def display_images(indexes):
  plt.figure(figsize=(20, 20))
  n = 0
  for ix in indexes:
    n += 1
    plt.subplot(5 , 5, n)
    plt.subplots_adjust(hspace = 0.7, wspace = 0.3)
    image = readImage(f"{GDRIVE_WORKING_PATH}/iaprtc12/images/{ix}.jpg")
    plt.imshow(image)
    plt.title("\n".join(wrap(predicted_captions[n-1], 40)), fontsize=8)
    plt.axis("off")

# Metrics

## BLEU

In [12]:
def get_bleu_score(ref, hyp):
  r = ref[8:-6].split(" ")
  h = hyp[6:-4].split(" ")

  w = [
    (1, 0, 0, 0),
    (0.5, 0.5, 0, 0),
    (0.33, 0.33, 0.33, 0),
    (0.25, 0.25, 0.25, 0.25)
  ]

  scores = bleu_score.sentence_bleu([r], h, weights=w)

  return scores

## Meteor

In [13]:
def get_meteor(ref, hyp):
  r = ref[8:-6]
  h = hyp[6:-4]

  score = meteor([word_tokenize(r)], word_tokenize(h))

  return score

# Predict

In [14]:
def evaluate_test(model, vectorizer, cap_len, img_features, mod_name):
  bleu_dict = {}
  meteor_dict = {}
  for ix in tqdm(test_ids):
    feature = img_features[ix].reshape(1,4096)
    in_text = "start"
    for i in range(1, cap_len):
      sequence = vectorizer(in_text)
      sequence = pad_sequences([sequence], cap_len, padding="post")
      y_pred = model.predict([feature, sequence], verbose=0)
      y_hat = np.argmax(y_pred)

      word = vectorizer.get_vocabulary()[y_hat]

      if word is None:
        break

      in_text += " " + word

      if word == "end":
        break

    ground_truth = clean_captions_test[ix]
    predicted = in_text

    # calculate score
    bleu_dict[ix] = get_bleu_score(ground_truth, predicted)
    meteor_dict[ix] = get_meteor(ground_truth, predicted)

  pickle.dump(bleu_dict, open(f"{GDRIVE_WORKING_PATH}/models/{mod_name}/bleu_score.pkl", "wb"))
  pickle.dump(meteor_dict, open(f"{GDRIVE_WORKING_PATH}/models/{mod_name}/meteor.pkl", "wb"))

## Basic Model

In [15]:
if False:
  model_name = "basic"
  model_file_name = f"{GDRIVE_WORKING_PATH}/models/{model_name}/saved_model"
  cap_len = 73
  img_f = get_vgg_imagenet()
  m = load_model(model_file_name)
  vec = get_vectorizer()
  evaluate_test(m, vec, cap_len, img_f, model_name)

## Plus Dropout

In [16]:
if False:
  model_name = "plus_dropout"
  model_file_name = f"{GDRIVE_WORKING_PATH}/models/{model_name}/saved_model"
  cap_len = 73
  img_f = get_vgg_imagenet()
  m = load_model(model_file_name)
  vec = get_vectorizer()
  evaluate_test(m, vec, cap_len, img_f, model_name)

## Plus L1 Reg

In [17]:
if False:
  model_name = "plus_reg"
  model_file_name = f"{GDRIVE_WORKING_PATH}/models/{model_name}/saved_model"
  cap_len = 73
  img_f = get_vgg_imagenet()
  m = load_model(model_file_name)
  vec = get_vectorizer()
  evaluate_test(m, vec, cap_len, img_f, model_name)

## Basic + Drop on Places365

In [18]:
if False:
  model_name = "plus_dropout_places"
  model_file_name = f"{GDRIVE_WORKING_PATH}/models/{model_name}/saved_model"
  cap_len = 73
  img_f = get_vgg_places()
  m = load_model(model_file_name)
  vec = get_vectorizer()
  evaluate_test(m, vec, cap_len, img_f, model_name)

## Basic + Drop + Augmented

In [19]:
if True:
  model_name = "dropout_aug"
  model_file_name = f"{GDRIVE_WORKING_PATH}/models/{model_name}/saved_model"
  cap_len = 76
  img_f = get_vgg_imagenet()
  m = load_model(model_file_name)
  vec = get_vectorizer_aug()
  evaluate_test(m, vec, cap_len, img_f, model_name)

  0%|          | 0/2000 [00:00<?, ?it/s]

# Results

In [20]:
for name in MODEL_NAMES:
  file_name = f"/content/gdrive/MyDrive/eval/{name}_bleu_score.pkl"
  with open(file_name, "rb") as f:
    bleu_dict = pickle.load(f)

  file_name = f"/content/gdrive/MyDrive/eval/{name}_meteor.pkl"
  with open(file_name, "rb") as f:
    meteor_dict = pickle.load(f)

  meteor_mean = np.mean([el for el in list(meteor_dict.values())])

  bleu1_mean = np.mean([el[0] for el in list(bleu_dict.values())])
  bleu2_mean = np.mean([el[1] for el in list(bleu_dict.values())])
  bleu3_mean = np.mean([el[2] for el in list(bleu_dict.values())])
  bleu4_mean = np.mean([el[3] for el in list(bleu_dict.values())])

  print(f"MODEL -------------------------- {name}")
  print(f"METEOR ---> {round(meteor_mean, 3)}")
  print(f"BLEU1 ---> {round(bleu1_mean, 3)}")
  print(f"BLEU2 ---> {round(bleu2_mean, 3)}")
  print(f"BLEU3 ---> {round(bleu3_mean, 3)}")
  print(f"BLEU4 ---> {round(bleu4_mean, 3)}")
  print(f"---------------------------------------")

MODEL -------------------------- basic
METEOR ---> 0.279
BLEU1 ---> 0.272
BLEU2 ---> 0.17
BLEU3 ---> 0.11
BLEU4 ---> 0.047
---------------------------------------
MODEL -------------------------- plus_dropout
METEOR ---> 0.296
BLEU1 ---> 0.291
BLEU2 ---> 0.185
BLEU3 ---> 0.124
BLEU4 ---> 0.064
---------------------------------------
MODEL -------------------------- plus_reg
METEOR ---> 0.17
BLEU1 ---> 0.173
BLEU2 ---> 0.078
BLEU3 ---> 0.046
BLEU4 ---> 0.0
---------------------------------------
MODEL -------------------------- plus_dropout_places
METEOR ---> 0.268
BLEU1 ---> 0.257
BLEU2 ---> 0.162
BLEU3 ---> 0.108
BLEU4 ---> 0.053
---------------------------------------
MODEL -------------------------- dropout_aug
METEOR ---> 0.254
BLEU1 ---> 0.256
BLEU2 ---> 0.151
BLEU3 ---> 0.096
BLEU4 ---> 0.044
---------------------------------------


# Tests

In [None]:
def compare_results(img_ids):

  feat_imagenet = get_vgg_imagenet()
  feat_places = get_vgg_places()
  vec_aug = get_vectorizer_aug()
  vec = get_vectorizer()

  for img_id in img_ids:
    print(f"ID image --> {img_id}")
    print(f"Ground truth --> {clean_captions_test[img_id]}")
    print("------------------------------------------------------------------")

    for m_name in MODEL_NAMES:

      if m_name == "plus_dropout_places":
        img_features = feat_places
      else:
        img_features = feat_imagenet

      if m_name == "dropout_aug":
        caption_length = 76
        vectorizer = vec_aug
      else:
        caption_length = 73
        vectorizer = vec

      # load model
      model_file_name = f"{GDRIVE_WORKING_PATH}/models/{m_name}/saved_model"
      model = load_model(model_file_name)

      feature = img_features[img_id].reshape(1,4096)
      in_text = "start"
      for i in range(1, caption_length):
        sequence = vectorizer(in_text)
        sequence = pad_sequences([sequence], caption_length, padding="post")
        y_pred = model.predict([feature, sequence], verbose=0)
        y_hat = np.argmax(y_pred)

        word = vectorizer.get_vocabulary()[y_hat]

        if word is None:
          break

        in_text += " " + word

        if word == "end":
          break

      print(f"MODEL {str.upper(m_name)}")
      print(in_text)
      print(get_bleu_score(clean_captions_test[img_id], in_text))
      print(get_meteor(clean_captions_test[img_id], in_text))

      print("------------------------------------------------------------------")


    print(" ")
    print(" ")
    print(" ")
    print(" ")

In [None]:
compare_results(["37794"])

ID image --> 37794
Ground truth --> [start] cyclist with red blue and white jersey black cycling shorts and blue helmet is riding on black and red racing bike on grey road green and brown grass dark green trees and blue sky in the background [end]
------------------------------------------------------------------
MODEL BASIC
start cyclist with red red and red and red jersey black cycling shorts and red racing bike on grey road with red racing bike on grey road with red and blue sky in the background end
[0.6193395548601198, 0.4716446149000649, 0.37194033050023495, 0.2658035116229967]
0.5701209932884523
------------------------------------------------------------------
MODEL PLUS_DROPOUT
start cyclist with red blue and white jersey black cycling shorts and blue helmet is riding on black and red racing bike on grey road in flat landscape with green meadows and trees in the background end
[0.8095364031384598, 0.6944552477903455, 0.658259398511576, 0.6198013086528833]
0.7642663043478263
--