Import libraries. We will use Google Colab Pro + to execute this notebook since it takes about 5 hours to execute each model
Acknowledgements:

AnalyticsVidhya.com


In [None]:
import numpy as np
import pandas as pd
from numpy import array
import string
from PIL import Image
import pickle

import matplotlib.pyplot as plt
import sys, time, os, warnings
warnings.filterwarnings("ignore")
import re


import keras
import tensorflow as tf
from tqdm import tqdm
from nltk.translate.bleu_score import sentence_bleu

from keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.utils import plot_model
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input
from keras.layers import Dense, BatchNormalization
from keras.layers import LSTM
from keras.layers import Embedding
from keras.layers import Dropout
from keras.layers.merge import add
from keras.callbacks import ModelCheckpoint
from keras.preprocessing.image import load_img, img_to_array
from keras.preprocessing.text import Tokenizer

from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from nltk.translate.bleu_score import corpus_bleu

Mount Google Drive so we can read images and captions that are stored on the drive

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
import glob

Mounted at /content/drive


Check that the drive is properly mounted and we caqn read the contents

In [None]:
!ls "/content/drive/My Drive/w266"

In [None]:
image_path = "/content/drive/My Drive/w266/Flicker8kDataset"

Checking the total number of images in the dataset

In [None]:
image_path = "/content/drive/My Drive/w266/Flicker8kDataset"
files = glob.glob(image_path+ "/*.jpg")
i=0
for file in files:
    i=i+1

print("Total Images in Dataset", i)

Total Images in Dataset 8091


In [None]:
'''Array of captions; each entry in the file contains the following
    1. image file name
    2. '#' delimiter
    3. a number for the caption
    4. '\t' separator
    5. caption for the image'''
def create_captions_df(descriptions_file):
    with open(descriptions_file, 'r') as f:
        caption = f.read()
        f.close()

    captions = []
    for line in caption.split('\n'):
        token = line.split('\t')
        if len(token) == 1:
            continue
        ind = token[0].split("#")
        captions.append(ind + [token[1].lower()])

    captions_df = pd.DataFrame(captions,columns=["filename","index","caption"])
    captions_df = captions_df.reindex(columns =['index','filename','caption'])
    captions_df = captions_df[captions_df.filename != '2258277193_586949ec62.jpg.1']
    return captions_df



In [None]:
descriptions_file = "/content/drive/My Drive/w266/Flicker8kText/Flickr8k.token.txt"
captions_df = create_captions_df(descriptions_file)
unique_images = np.unique(captions_df.filename.values)
print("Number of Unique images = ",len(unique_images))
captions_df.head()

In [None]:
''' Generate top 5 images with captions '''
def show_top_5_images(unique_images, image_path):
    target_size = (224,224,3)
    count = 1

    fig = plt.figure(figsize=(10,20))

    for image_file in unique_images[100:104]:
        filename = image_path + '/' + image_file
        print(filename)
        captions = list(captions_df["caption"].loc[captions_df["filename"]==image_file].values)
        image_load = load_img(filename, target_size=target_size)
        ax = fig.add_subplot(5,2,count,xticks=[],yticks=[])
        ax.imshow(image_load)
        count += 1

        ax = fig.add_subplot(5,2,count)
        plt.axis('off')
        ax.plot()
        ax.set_xlim(0,1)
        ax.set_ylim(0,len(captions))
        for i, caption in enumerate(captions):
            ax.text(0,i,caption,fontsize=20)
        count += 1
    plt.show()

In [None]:
show_top_5_images(unique_images, image_path)

Function to create vocabulary

In [None]:
def create_vocabulary(captions_df):
    vocab = []
    for caption in captions_df.caption.values:
        vocab.extend(caption.split())
    return vocab

Create vocabulary and check size

In [None]:
vocabulary = create_vocabulary(captions_df)
print('Vocabulary Size: %d' % len(set(vocabulary)))

Vocabulary Size: 8918


Various housekeeping methods(removing punctuations, numeric tokens, single letters) to prepare text

In [None]:
def check_numeric(caption):
    caption_new = ""
    for word in caption.split():
        isalpha = word.isalpha()
        if isalpha:
            caption_new += " " + word
    return(caption_new)

def check_single_character(text):
    text_len_more_than1 = ""
    for word in text.split():
        if len(word) > 1:
           text_len_more_than1 += " " + word
    return(text_len_more_than1)

def check_punctuation(caption):
    caption_new = caption.translate(string.punctuation)
    return(caption_new)

def clean_captions(caption):
    caption_new = check_punctuation(caption)
    caption_new = check_single_character(caption_new)
    caption_new = check_numeric(caption_new)
    return(caption_new)

In [None]:
for i, caption in enumerate(captions_df.caption.values):
    newcaption = clean_captions(caption)
    captions_df["caption"].iloc[i] = newcaption

In [None]:
cleansed_vocabulary = []
for txt in captions_df.caption.values:
    cleansed_vocabulary.extend(txt.split())
print('Clean Vocabulary Size: %d' % len(set(cleansed_vocabulary)))

Clean Vocabulary Size: 8357


Append <start> and <end> tokens to captions and store in new list

Check a few captions from this list to confirm

In [None]:
PATH = image_path +"/"
all_captions = []
for caption  in captions_df["caption"].astype(str):
    caption = '<start> ' + caption+ ' <end>'
    all_captions.append(caption)

all_captions[:10]

In [None]:
all_img_name_vector = []
for annot in captions_df["filename"]:
    full_image_path = PATH + annot
    all_img_name_vector.append(full_image_path)

all_img_name_vector[:10]

Check image pathnames and captions

In [None]:
print(f"len(all_img_name_vector) : {len(all_img_name_vector)}")
print(f"len(all_captions) : {len(all_captions)}")

len(all_img_name_vector) : 40455
len(all_captions) : 40455


We are taking 625 batches with batch size 64.So need to restrict to 625 * 64 = 40000 

In [None]:
def data_limiter(num,total_captions,all_img_name_vector):
    train_captions, img_name_vector = shuffle(total_captions,all_img_name_vector,random_state=1)
    train_captions = train_captions[:num]
    img_name_vector = img_name_vector[:num]
    return train_captions,img_name_vector

train_captions,img_name_vector = data_limiter(40000,all_captions,all_img_name_vector)

Extract an image vector of the images using InceptionV3

In [None]:
def load_image(image_path):
    img = tf.io.read_file(image_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, (299, 299))
    img = tf.keras.applications.inception_v3.preprocess_input(img)
    return img, image_path

image_model = tf.keras.applications.InceptionV3(include_top=False, weights='imagenet')
new_input = image_model.input
hidden_layer = image_model.layers[-1].output
image_features_extract_model = tf.keras.Model(new_input, hidden_layer)

Pre process each image using InceptionV3

In [None]:
encode_train = sorted(set(img_name_vector))
image_dataset = tf.data.Dataset.from_tensor_slices(encode_train)
image_dataset = image_dataset.map(load_image, num_parallel_calls=tf.data.experimental.AUTOTUNE).batch(64)

Extract the features and store as .npy files, save files in image path directory (for future reuse)

In [None]:
for img, path in tqdm(image_dataset):
    batch_features = image_features_extract_model(img)
    batch_features = tf.reshape(batch_features,
                              (batch_features.shape[0], -1, batch_features.shape[3]))

    for bf, p in zip(batch_features, path):
        path_of_feature = p.numpy().decode("utf-8")
        np.save(path_of_feature, bf.numpy())

100%|██████████| 127/127 [07:14<00:00,  3.42s/it]


In [None]:
''' building a vocabulary with top 500 unique words '''
top_k = 5000
tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=top_k,
                                                 oov_token="<unk>",
                                                 filters='!"#$%&()*+.,-/:;=?@[\]^_`{|}~ ')

tokenizer.fit_on_texts(train_captions)
train_seqs = tokenizer.texts_to_sequences(train_captions)
tokenizer.word_index['<pad>'] = 0
tokenizer.index_word[0] = '<pad>'

train_seqs = tokenizer.texts_to_sequences(train_captions)
cap_vector = tf.keras.preprocessing.sequence.pad_sequences(train_seqs, padding='post')

Create training and test datasets using 80-20 split

In [None]:
img_name_train, img_name_val, cap_train, cap_val = train_test_split(img_name_vector,cap_vector, test_size=0.2, random_state=0)

Dataset for use in training the model

In [None]:
BATCH_SIZE = 64
BUFFER_SIZE = 1000
num_steps = len(img_name_train) // BATCH_SIZE

def map_func(img_name, cap):
    img_tensor = np.load(img_name.decode('utf-8')+'.npy')
    return img_tensor, cap

dataset = tf.data.Dataset.from_tensor_slices((img_name_train, cap_train))
dataset = dataset.map(lambda item1, item2: tf.numpy_function(map_func, [item1, item2], [tf.float32, tf.int32]),num_parallel_calls=tf.data.experimental.AUTOTUNE)
dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE)
dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

In [None]:
def get_angles(pos, i, d_model):
    angle_rates = 1 / np.power(10000, (2 * (i//2)) / np.float32(d_model))
    return pos * angle_rates

def positional_encoding_1d(position, d_model):
    angle_rads = get_angles(np.arange(position)[:, np.newaxis],
                           np.arange(d_model)[np.newaxis, :],
                           d_model)

    angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])
    angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])
    pos_encoding = angle_rads[np.newaxis, ...]
    return tf.cast(pos_encoding, dtype=tf.float32)

def positional_encoding_2d(row,col,d_model):
    assert d_model % 2 == 0
    row_pos = np.repeat(np.arange(row),col)[:,np.newaxis]
    col_pos = np.repeat(np.expand_dims(np.arange(col),0),row,axis=0).reshape(-1,1)

    angle_rads_row = get_angles(row_pos,np.arange(d_model//2)[np.newaxis,:],d_model//2)
    angle_rads_col = get_angles(col_pos,np.arange(d_model//2)[np.newaxis,:],d_model//2)

    angle_rads_row[:, 0::2] = np.sin(angle_rads_row[:, 0::2])
    angle_rads_row[:, 1::2] = np.cos(angle_rads_row[:, 1::2])
    angle_rads_col[:, 0::2] = np.sin(angle_rads_col[:, 0::2])
    angle_rads_col[:, 1::2] = np.cos(angle_rads_col[:, 1::2])
    pos_encoding = np.concatenate([angle_rads_row,angle_rads_col],axis=1)[np.newaxis, ...]
    return tf.cast(pos_encoding, dtype=tf.float32)

In [None]:
def create_padding_mask(seq):
    seq = tf.cast(tf.math.equal(seq, 0), tf.float32)
    return seq[:, tf.newaxis, tf.newaxis, :]  # (batch_size, 1, 1, seq_len)

def create_look_ahead_mask(size):
    mask = 1 - tf.linalg.band_part(tf.ones((size, size)), -1, 0)
    return mask  # (seq_len, seq_len)

def scaled_dot_product_attention(q, k, v, mask):
    matmul_qk = tf.matmul(q, k, transpose_b=True)  # (..., seq_len_q, seq_len_k)
    dk = tf.cast(tf.shape(k)[-1], tf.float32)
    scaled_attention_logits = matmul_qk / tf.math.sqrt(dk)

    if mask is not None:
        scaled_attention_logits += (mask * -1e9) 

    attention_weights = tf.nn.softmax(scaled_attention_logits, axis=-1) 
    output = tf.matmul(attention_weights, v)  # (..., seq_len_q, depth_v)

    return output, attention_weights

In [None]:
''' Main Attention class and feed forward method definition'''
class MultiHeadAttention(tf.keras.layers.Layer):
    def __init__(self, d_model, num_heads):
        super(MultiHeadAttention, self).__init__()
        self.num_heads = num_heads
        self.d_model = d_model
        assert d_model % self.num_heads == 0
        self.depth = d_model // self.num_heads
        self.wq = tf.keras.layers.Dense(d_model)
        self.wk = tf.keras.layers.Dense(d_model)
        self.wv = tf.keras.layers.Dense(d_model)
        self.dense = tf.keras.layers.Dense(d_model)

    def split_heads(self, x, batch_size):
        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.depth))
        return tf.transpose(x, perm=[0, 2, 1, 3])

    def call(self, v, k, q, mask=None):
        batch_size = tf.shape(q)[0]
        q = self.wq(q)  
        k = self.wk(k) 
        v = self.wv(v)  

        q = self.split_heads(q, batch_size)  
        k = self.split_heads(k, batch_size)  
        v = self.split_heads(v, batch_size)  

        scaled_attention, attention_weights = scaled_dot_product_attention(q, k, v, mask)
        scaled_attention = tf.transpose(scaled_attention, perm=[0, 2, 1, 3])  

        concat_attention = tf.reshape(scaled_attention,
                                 (batch_size, -1, self.d_model))  

        output = self.dense(concat_attention)  
        return output, attention_weights

def point_wise_feed_forward_network(d_model, dff):
     return tf.keras.Sequential([
                tf.keras.layers.Dense(dff, activation='relu'),  
                tf.keras.layers.Dense(d_model)])  



In [None]:
''' Encoder decoder layer defintion '''
class EncoderLayer(tf.keras.layers.Layer):
    def __init__(self, d_model, num_heads, dff, rate=0.1):
        super(EncoderLayer, self).__init__()
        self.mha = MultiHeadAttention(d_model, num_heads)
        self.ffn = point_wise_feed_forward_network(d_model, dff)

        self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)

        self.dropout1 = tf.keras.layers.Dropout(rate)
        self.dropout2 = tf.keras.layers.Dropout(rate)


    def call(self, x, training, mask=None):
        attn_output, _ = self.mha(x, x, x, mask)  
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(x + attn_output)  

        ffn_output = self.ffn(out1)  
        ffn_output = self.dropout2(ffn_output, training=training)
        out2 = self.layernorm2(out1 + ffn_output)  
        return out2

class DecoderLayer(tf.keras.layers.Layer):
    def __init__(self, d_model, num_heads, dff, rate=0.1):
        super(DecoderLayer, self).__init__()
        self.mha1 = MultiHeadAttention(d_model, num_heads)
        self.mha2 = MultiHeadAttention(d_model, num_heads)

        self.ffn = point_wise_feed_forward_network(d_model, dff)

        self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.layernorm3 = tf.keras.layers.LayerNormalization(epsilon=1e-6)

        self.dropout1 = tf.keras.layers.Dropout(rate)
        self.dropout2 = tf.keras.layers.Dropout(rate)
        self.dropout3 = tf.keras.layers.Dropout(rate)

    def call(self, x, enc_output, training,look_ahead_mask=None, padding_mask=None):
        attn1, attn_weights_block1 = self.mha1(x, x, x, look_ahead_mask)  
        attn1 = self.dropout1(attn1, training=training)
        out1 = self.layernorm1(attn1 + x)

        attn2, attn_weights_block2 = self.mha2(enc_output, enc_output, out1, padding_mask) 
        attn2 = self.dropout2(attn2, training=training)
        out2 = self.layernorm2(attn2 + out1)  

        ffn_output = self.ffn(out2)  
        ffn_output = self.dropout3(ffn_output, training=training)
        out3 = self.layernorm3(ffn_output + out2)  

        return out3, attn_weights_block1, attn_weights_block2


In [None]:
''' Encode and Decode class '''
class Encoder(tf.keras.layers.Layer):
    def __init__(self, num_layers, d_model, num_heads, dff, row_size,col_size,rate=0.1):
        super(Encoder, self).__init__()
        self.d_model = d_model
        self.num_layers = num_layers

        self.embedding = tf.keras.layers.Dense(self.d_model,activation='relu')
        self.pos_encoding = positional_encoding_2d(row_size,col_size,self.d_model)

        self.enc_layers = [EncoderLayer(d_model, num_heads, dff, rate) for _ in range(num_layers)]
        self.dropout = tf.keras.layers.Dropout(rate)

    def call(self, x, training, mask=None):
        seq_len = tf.shape(x)[1]
        x = self.embedding(x)  
        x += self.pos_encoding[:, :seq_len, :]
        x = self.dropout(x, training=training)

        for i in range(self.num_layers):
            x = self.enc_layers[i](x, training, mask)

        return x  
    
class Decoder(tf.keras.layers.Layer):
    def __init__(self, num_layers,d_model,num_heads,dff, target_vocab_size, maximum_position_encoding,   rate=0.1):
        super(Decoder, self).__init__()
        self.d_model = d_model
        self.num_layers = num_layers

        self.embedding = tf.keras.layers.Embedding(target_vocab_size, d_model)
        self.pos_encoding = positional_encoding_1d(maximum_position_encoding, d_model)

        self.dec_layers = [DecoderLayer(d_model, num_heads, dff, rate)
                         for _ in range(num_layers)]
        self.dropout = tf.keras.layers.Dropout(rate)

    def call(self, x, enc_output, training,look_ahead_mask=None, padding_mask=None):
        seq_len = tf.shape(x)[1]
        attention_weights = {}

        x = self.embedding(x)  
        x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
        x += self.pos_encoding[:, :seq_len, :]
        x = self.dropout(x, training=training)

        for i in range(self.num_layers):
            x, block1, block2 = self.dec_layers[i](x, enc_output, training,
                                            look_ahead_mask, padding_mask)
         
            attention_weights['decoder_layer{}_block1'.format(i+1)] = block1
            attention_weights['decoder_layer{}_block2'.format(i+1)] = block2

        return x, attention_weights

In [None]:
class Transformer(tf.keras.Model):
    def __init__(self, num_layers, d_model, num_heads, dff,row_size,col_size,
               target_vocab_size,max_pos_encoding, rate=0.1):
        super(Transformer, self).__init__()
        self.encoder = Encoder(num_layers, d_model, num_heads, dff,row_size,col_size, rate)
        self.decoder = Decoder(num_layers, d_model, num_heads, dff,
                          target_vocab_size,max_pos_encoding, rate)
        self.final_layer = tf.keras.layers.Dense(target_vocab_size)

    def call(self, inp, tar, training,look_ahead_mask=None,dec_padding_mask=None,enc_padding_mask=None   ):
        enc_output = self.encoder(inp, training, enc_padding_mask)  
        dec_output, attention_weights = self.decoder(
        tar, enc_output, training, look_ahead_mask, dec_padding_mask)
        final_output = self.final_layer(dec_output)  
        return final_output, attention_weights

In [None]:
class CustomSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
    def __init__(self, d_model, warmup_steps=4000):
        super(CustomSchedule, self).__init__()
        self.d_model = d_model
        self.d_model = tf.cast(self.d_model, tf.float32)
        self.warmup_steps = warmup_steps

    def __call__(self, step):
        arg1 = tf.math.rsqrt(step)
        arg2 = step * (self.warmup_steps ** -1.5)
        return tf.math.rsqrt(self.d_model) * tf.math.minimum(arg1, arg2)

In [None]:
class MyLRSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):

  def __init__(self, initial_learning_rate):
    self.initial_learning_rate = initial_learning_rate

  def __call__(self, step):
     return self.initial_learning_rate / (step + 1)

In [None]:
def loss_function(real, pred):
    mask = tf.math.logical_not(tf.math.equal(real, 0))
    loss_ = loss_object(real, pred)
    mask = tf.cast(mask, dtype=loss_.dtype)
    loss_ *= mask
    return tf.reduce_sum(loss_)/tf.reduce_sum(mask)

In [None]:
def create_masks_decoder(tar):
    look_ahead_mask = create_look_ahead_mask(tf.shape(tar)[1])
    dec_target_padding_mask = create_padding_mask(tar)
    combined_mask = tf.maximum(dec_target_padding_mask, look_ahead_mask)
    return combined_mask

In [None]:
''' various training steps including calculating the loss and accuracy'''
@tf.function
def train_step(img_tensor, tar):
   tar_inp = tar[:, :-1]
   tar_real = tar[:, 1:]
   dec_mask = create_masks_decoder(tar_inp)
   with tf.GradientTape() as tape:
      predictions, _ = transformer(img_tensor, tar_inp,True, dec_mask)
      loss = loss_function(tar_real, predictions)

   gradients = tape.gradient(loss, transformer.trainable_variables)   
   optimizer.apply_gradients(zip(gradients, transformer.trainable_variables))
   train_loss(loss)
   train_accuracy(tar_real, predictions)

In [None]:
''' define model evaluation'''
def evaluate(image):
    temp_input = tf.expand_dims(load_image(image)[0], 0)
    img_tensor_val = image_features_extract_model(temp_input)
    img_tensor_val = tf.reshape(img_tensor_val, (img_tensor_val.shape[0], -1, img_tensor_val.shape[3]))
    start_token = tokenizer.word_index['<start>']
    end_token = tokenizer.word_index['<end>']
    decoder_input = [start_token]
    output = tf.expand_dims(decoder_input, 0) #tokens
    result = [] #word list

    for i in range(100):
        dec_mask = create_masks_decoder(output)
        predictions, attention_weights = transformer(img_tensor_val,output,False,dec_mask)
        predictions = predictions[: ,-1:, :]  # (batch_size, 1, vocab_size)
        predicted_id = tf.cast(tf.argmax(predictions, axis=-1), tf.int32)
        if predicted_id == end_token:
            return result,tf.squeeze(output, axis=0), attention_weights
        result.append(tokenizer.index_word[int(predicted_id)])
        output = tf.concat([output, predicted_id], axis=-1)

    return result,tf.squeeze(output, axis=0), attention_weights

In [None]:
''' Validation with BLEU ''' 
def generate_sentence_bleu():
    rid = np.random.randint(0, len(img_name_val))
    print(rid)
    image = img_name_val[rid]
    real_caption = ' '.join([tokenizer.index_word[i] for i in cap_val[rid] if i not in [0]])
    caption,result,attention_weights = evaluate(image)

    first = real_caption.split(' ', 1)[1]
    real_caption = first.rsplit(' ', 1)[0]

    for i in caption:
        if i=="<unk>":
            caption.remove(i)

    for i in real_caption:
        if i=="<unk>":
            real_caption.remove(i)

    result_join = ' '.join(caption)
    result_final = result_join.rsplit(' ', 1)[0]
    real_appn = []
    real_appn.append(real_caption.split())
    reference = real_appn
    candidate = caption

    score = sentence_bleu(reference, candidate, weights=(1.0,0,0,0))
    print(f"BLEU-1 score: {score}")
    score = sentence_bleu(reference, candidate, weights=(0.5,0.5,0,0))
    print(f"BLEU-2 score: {score}")
    score = sentence_bleu(reference, candidate, weights=(0.3,0.3,0.3,0))
    print(f"BLEU-3 score: {score}")
    score = sentence_bleu(reference, candidate, weights=(0.25,0.25,0.25,0.25))
    print(f"BLEU-4 score: {score}")
    print ('Real Caption:', real_caption)
    print ('Predicted Caption:', ' '.join(caption))
    temp_image = np.array(Image.open(image))
    plt.imshow(temp_image)

In [None]:
def evaluate_modelx(num):

    print("Number=", num)

    actual, predicted = list(), list()
    for x in range(num):
        image = img_name_val[x]
        real_caption = ' '.join([tokenizer.index_word[i] for i in cap_val[x] if i not in [0]])
        caption, result, attention_weights = evaluate(image)

        first = real_caption.split(' ', 1)[1]
        real_caption = first.rsplit(' ', 1)[0]

        for i in caption:
            if i=="<unk>":
                 caption.remove(i)

        for i in real_caption:
            if i=="<unk>":
                real_caption.remove(i)

        result_join = ' '.join(caption)
        result_final = result_join.rsplit(' ', 1)[0]
        real_appn = []
        real_appn.append(real_caption.split())
        actual.append(real_appn)
        predicted.append(result_final.split())

    # calculate BLEU score
    print('BLEU-1: %f' % corpus_bleu(actual, predicted, weights=(1.0, 0, 0, 0)))
    print('BLEU-2: %f' % corpus_bleu(actual, predicted, weights=(0.5, 0.5, 0, 0)))
    print('BLEU-3: %f' % corpus_bleu(actual, predicted, weights=(0.3, 0.3, 0.3, 0)))
    print('BLEU-4: %f' % corpus_bleu(actual, predicted, weights=(0.25, 0.25, 0.25, 0.25)))

In [None]:
def evaluate_model2(num):

    print("Corpus BLEU")

    actual, predicted = list(), list()
    for x in range(num):
        image = img_name_val[x]
        real_caption = ' '.join([tokenizer.index_word[i] for i in cap_val[x] if i not in [0]])
        caption, result, attention_weights = evaluate(image)

        first = real_caption.split(' ', 1)[1]
        real_caption = first.rsplit(' ', 1)[0]

        for i in caption:
            if i=="<unk>":
                caption.remove(i)

        for i in real_caption:
            if i=="<unk>":
                real_caption.remove(i)

        result_join = ' '.join(caption)
        result_final = result_join.rsplit(' ', 1)[0]
        real_appn = []
        real_appn.append(real_caption.split())
        reference = real_appn
        candidate = caption
        actual.append(reference)
        predicted.append(candidate)


    print('BLEU-1: %f' % corpus_bleu(actual, predicted, weights=(1.0, 0, 0, 0)))
    print('BLEU-2: %f' % corpus_bleu(actual, predicted, weights=(0.5, 0.5, 0, 0)))
    print('BLEU-3: %f' % corpus_bleu(actual, predicted, weights=(0.3, 0.3, 0.3, 0)))
    print('BLEU-4: %f' % corpus_bleu(actual, predicted, weights=(0.25, 0.25, 0.25, 0.25)))

In [None]:
def run_model(results_df):
    for epoch in range(32):
        start = time.time()
        train_loss.reset_states()
        train_accuracy.reset_states()
        for (batch, (img_tensor, tar)) in enumerate(dataset):
            train_step(img_tensor, tar)
            if batch % 50 == 0:
                print ('Epoch {} Batch {} Loss {:.4f} Accuracy {:.4f}'.format(
                epoch + 1, batch, train_loss.result(), train_accuracy.result()))

            print ('Epoch {} Loss {:.4f} Accuracy {:.4f}'.format(epoch + 1,
                                                  train_loss.result(),
                                                  train_accuracy.result()))
    #        data = [[batch,epoch+1,"{:.2f}".format(train_loss.result()),"{:.2f}".format(train_accuracy.result()),opt,lr,beta1,beta2,eps,dec,init_acc] ]
    #        df = pd.DataFrame(data, columns = ["Batch", "Epoch", "Train Loss", "Train Accuracy","Opt","Learn Rate","beta_1","beta_2","epsilon","decay","init_acc"])

            data = [batch]
            my_df= pd.DataFrame(data=data, columns=["Batch"])
            my_df["Epoch"] = epoch + 1
            my_df["Train Loss"] = "{:.2f}".format(train_loss.result())
            my_df["Train Accuracy"] = "{:.2f}".format(train_accuracy.result())
            my_df["Optimizer"] = opt
            my_df["beta_1"] = beta1
            my_df["beta_2"] = beta2
            my_df["Epsilon"] = eps
            my_df["Decay"] = dec
            my_df["Initial Accumulator"] = init_acc
            my_df["Num Layers"] = num_layer
            my_df["D Model"] = d_model

            results_df = results_df.append(my_df, ignore_index=True)

        print ('Time taken for 1 epoch: {} secs\n'.format(time.time() - start))
    return results_df

Below we have multiple runs with various hyperparameters tuning

In [None]:
''' Run ----1---- 
Various transformer parameters'''

num_layer = 2
d_model = 512
dff = 2048
num_heads = 8
row_size = 8
col_size = 8
target_vocab_size = top_k + 1
dropout_rate = 0.1 

# Optimizer parameters
beta1 = 0.9
beta2 = 0.98
eps = 1e-9
opt="Adam"
dec= None
lr = 'Custom'
init_acc = None

learning_rate = CustomSchedule(d_model)
optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=beta1, beta_2=beta2,
                                    epsilon=eps)

optimizer = tf.keras.optimizers.Adam(learning_rate)

loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
transformer = Transformer(num_layer,d_model,num_heads,dff,row_size,col_size,target_vocab_size, max_pos_encoding=target_vocab_size,rate=dropout_rate)    

results_df = pd.DataFrame()
results_df =  run_model(results_df)
results_df.to_csv("/content/drive/MyDrive/w266/Adam_CustomLR1_32_epochs.csv", sep='\t', encoding='utf-8')

generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()

lst = [100]
for l in lst:
    evaluate_model2(l)


**Output from script above:**

6465

BLEU-1 score: 0.21428571428571425
BLEU-2 score: 0.12838814775327384
BLEU-3 score: 0.29181993744070167
BLEU-4 score: 0.35831291876413535

Real Caption: man crouching on rocks by the ocean shore

Predicted Caption: man in green shirt climbing on rocks while water rushes over them at night

73

BLEU-1 score: 0.5
BLEU-2 score: 0.26726124191242434
BLEU-3 score: 0.45306612226568754
BLEU-4 score: 0.5169731539571706

Real Caption: two little white dogs running

Predicted Caption: two white dogs are running towards each other

4205

BLEU-1 score: 0.4
BLEU-2 score: 0.21081851067789192
BLEU-3 score: 0.3929571945508051
BLEU-4 score: 0.4591497693322865

Real Caption: man and two children in boat on the water

Predicted Caption: man and woman in life jackets paddling rowboat across water

521

BLEU-1 score: 0.23884377019126307
BLEU-2 score: 0.1462613413027904
BLEU-3 score: 0.27616701141841127
BLEU-4 score: 0.323729563941832

Real Caption: black dog runs out of the water with stick in its mouth

Predicted Caption: black dog is splashing through soggy grass carrying stick

BLEU-1: 0.319463
BLEU-2: 0.190464
BLEU-3: 0.145145
BLEU-4: 0.066649

In [None]:
''' Run ----2---- 
Various transformer parameters'''
num_layer = 3
d_model = 512
dff = 2048
num_heads = 8
row_size = 8
col_size = 8
target_vocab_size = top_k + 1
dropout_rate = 0.1 

# Optimizer parameters
beta1 = 0.9
beta2 = 0.98
eps = 1e-9
opt="Adam"
dec= None
lr = 'Custom'
init_acc = None

learning_rate = CustomSchedule(d_model)
optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=beta1, beta_2=beta2,
                                    epsilon=eps)

optimizer = tf.keras.optimizers.Adam(learning_rate)

loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
transformer = Transformer(num_layer,d_model,num_heads,dff,row_size,col_size,target_vocab_size, max_pos_encoding=target_vocab_size,rate=dropout_rate)    

results_df = pd.DataFrame()
results_df =  run_model(results_df)
results_df.to_csv("/content/drive/MyDrive/w266/Adam_CustomLR2_32_epochs.csv", sep='\t', encoding='utf-8')

generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()

lst = [100]
for l in lst:
    evaluate_model2(l)


**Output from script above:**

2732

BLEU-1 score: 0.6
BLEU-2 score: 0.3872983346207417
BLEU-3 score: 0.566014266387059
BLEU-4 score: 0.6223329772884784

Real Caption: man running with ball

Predicted Caption: man with ball and defenders

1155

BLEU-1 score: 0.4166666666666667
BLEU-2 score: 0.19462473604038075
BLEU-3 score: 0.37455777968751125
BLEU-4 score: 0.44116293593227063

Real Caption: white dog and black dog holding toy between them in their mouths

Predicted Caption: black and white dog is playing with pull toy next to person

6053

BLEU-1 score: 0.15384615384615385
BLEU-2 score: 0.3922322702763681
BLEU-3 score: 0.5703297064170131
BLEU-4 score: 0.6262844962765469

Real Caption: woman stands at the edge of cliff overlooking the mountains

Predicted Caption: woman in pink shirt and khaki shorts looks over crumbling 
rock overlooking forest

1049

BLEU-1 score: 0.3333333333333333
BLEU-2 score: 0.28005601680560194
BLEU-3 score: 0.20282008004191915
BLEU-4 score: 0.26460159523593296

Real Caption: dog is standing on metal beam <unk> looking at something

Predicted Caption: black and brown dog is wearing blue color and is standing on the water looking at the camera

BLEU-1: 0.306444
BLEU-2: 0.170097
BLEU-3: 0.125684
BLEU-4: 0.058797

In [None]:
''' Run ----3---- 
Various transformer parameters'''

num_layer = 4
d_model = 512
dff = 2048
num_heads = 8
row_size = 8
col_size = 8
target_vocab_size = top_k + 1
dropout_rate = 0.1 

# Optimizer parameters
beta1 = 0.9
beta2 = 0.98
eps = 1e-9
opt="Adam"
dec= None
lr = 'Custom'
init_acc = None

learning_rate = CustomSchedule(d_model)
optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=beta1, beta_2=beta2,
                                    epsilon=eps)

optimizer = tf.keras.optimizers.Adam(learning_rate)

loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
transformer = Transformer(num_layer,d_model,num_heads,dff,row_size,col_size,target_vocab_size, max_pos_encoding=target_vocab_size,rate=dropout_rate)    

results_df = pd.DataFrame()
results_df =  run_model(results_df)
results_df.to_csv("/content/drive/MyDrive/w266/Adam_CustomLR3_32_epochs.csv", sep='\t', encoding='utf-8')

generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()

lst = [100]
for l in lst:
    evaluate_model2(l)


**Output from script above**

4739

BLEU-1 score: 0.3333333333333333
BLEU-2 score: 0.1740776559556978
BLEU-3 score: 0.35030458207263226
BLEU-4 score: 0.41722614486115056

Real Caption: of young girl with pink shirt laying on grass

Predicted Caption: girl in pink top and pink shirt is running through flowered grass

7755

BLEU-1 score: 0.04555807961771006
BLEU-2 score: 0.1205353488781414
BLEU-3 score: 0.1778828328977971
BLEU-4 score: 0.19605997334125028

Real Caption: man holding plastic toys looks away while man wearing white hat smiles at the camera

Predicted Caption: two men one with beard at carnival

6934

BLEU-1 score: 0.46153846153846156
BLEU-2 score: 0.2773500981126146
BLEU-3 score: 0.22563121849503306
BLEU-4 score: 0.2891784933232572

Real Caption: little boy in green goggles and white life jacket jumps into the water

Predicted Caption: the person in red jacket and black pants is jumping into the water
4003

BLEU-1 score: 0.5
BLEU-2 score: 0.408248290463863
BLEU-3 score: 0.3130600345147708
BLEU-4 score: 0.37991784282579627

Real Caption: there are women wearing pink energizer bunny ears

Predicted Caption: women wearing yellow energizer bunny ears point to the right

Corpus BLEU
BLEU-1: 0.293333
BLEU-2: 0.169795
BLEU-3: 0.120139
BLEU-4: 0.039858

*italicized text*

In [None]:
''' Run ----5---- 
Various transformer parameters'''

num_layer = 5
d_model = 512
dff = 2048
num_heads = 8
row_size = 8
col_size = 8
target_vocab_size = top_k + 1
dropout_rate = 0.1 

# Optimizer parameters
beta1 = 0.9
beta2 = 0.98
eps = 1e-9
opt="Adam"
dec= None
lr = 'Custom'
init_acc = None

learning_rate = CustomSchedule(d_model)
optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=beta1, beta_2=beta2,
                                    epsilon=eps)

optimizer = tf.keras.optimizers.Adam(learning_rate)

loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
transformer = Transformer(num_layer,d_model,num_heads,dff,row_size,col_size,target_vocab_size, max_pos_encoding=target_vocab_size,rate=dropout_rate)    

results_df = pd.DataFrame()
results_df =  run_model(results_df)
results_df.to_csv("/content/drive/MyDrive/w266/Adam_CustomLR4_32_epochs.csv", sep='\t', encoding='utf-8')

generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()

lst = [100]
for l in lst:
    evaluate_model2(l)


**Output from script above**

6053

BLEU-1 score: 0.0909090909090909
BLEU-2 score: 0.30151134457776363
BLEU-3 score: 0.48705969722582854
BLEU-4 score: 0.5491004867761125

Real Caption: woman stands at the edge of cliff overlooking the mountains

Predicted Caption: woman in yellow shirt is standing on monument above crumbling rock

1988

BLEU-1 score: 0.036576998645322836
BLEU-2 score: 0.10973099593596852
BLEU-3 score: 0.17028556036455708
BLEU-4 score: 0.19005966012623146

Real Caption: the man wearing the brown jacket is holding cigarette and the man behind him is holding can of beer

Predicted Caption: two men are fencing one has body of them

6966

BLEU-1 score: 0.2727272727272727
BLEU-2 score: 0.1651445647689541
BLEU-3 score: 0.33940526177416436
BLEU-4 score: 0.4063798282013443

Real Caption: tan dog shaking the water off of himself

Predicted Caption: dog with red collar is playing with stick in the water

3137

BLEU-1 score: 0.0909090909090909
BLEU-2 score: 0.30151134457776363
BLEU-3 score: 0.48705969722582854
BLEU-4 score: 0.5491004867761125

Real Caption: group of six children sit at wooden table

Predicted Caption: three young boys are sitting in front of large metal containers

Corpus BLEU
BLEU-1: 0.247765
BLEU-2: 0.134768
BLEU-3: 0.096628
BLEU-4: 0.034908

In [None]:
''' Run ----6---- 
Various transformer parameters'''

num_layer = 2
d_model = 256
dff = 2048
num_heads = 8
row_size = 8
col_size = 8
target_vocab_size = top_k + 1
dropout_rate = 0.1 

# Optimizer parameters
beta1 = 0.9
beta2 = 0.98
eps = 1e-9
opt="Adam"
dec= None
lr = 'Custom'
init_acc = None

learning_rate = CustomSchedule(d_model)
optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=beta1, beta_2=beta2,
                                    epsilon=eps)

optimizer = tf.keras.optimizers.Adam(learning_rate)

loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
transformer = Transformer(num_layer,d_model,num_heads,dff,row_size,col_size,target_vocab_size, max_pos_encoding=target_vocab_size,rate=dropout_rate)    

results_df = pd.DataFrame()
results_df =  run_model(results_df)
results_df.to_csv("/content/drive/MyDrive/w266/Adam_CustomLR5_32_epochs.csv", sep='\t', encoding='utf-8')

generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()

lst = [100]
for l in lst:
    evaluate_model2(l)

**Output from script above**

5615

BLEU-1 score: 0.3333333333333333
BLEU-2 score: 0.5773502691896257
BLEU-3 score: 0.7192230933248643
BLEU-4 score: 0.7598356856515925

Real Caption: two brown dogs race on track

Predicted Caption: two dogs run in the mud

2883

BLEU-1 score: 0.13333333333333333
BLEU-2 score: 0.09759000729485329
BLEU-3 score: 0.24753875953635443
BLEU-4 score: 0.31239399369202553

Real Caption: man with head covering

Predicted Caption: white man with red hat and white sport jacket is staring while people look on

1740

BLEU-1 score: 0.13381535712974757
BLEU-2 score: 0.26763071425949514
BLEU-3 score: 0.3531408446312039
BLEU-4 score: 0.37848698581337653

Real Caption: child riding wheeled yellow toy with adult legs and another child in background

Predicted Caption: little girl rides on yellow plastic toy car

3205

BLEU-1 score: 0.5454545454545454
BLEU-2 score: 0.33028912953790823
BLEU-3 score: 0.5144421780318993
BLEU-4 score: 0.5747078645171895

Real Caption: girl in blue shirt leaps into the air

Predicted Caption: the girl is wearing blue shirt and jumping in the air

Corpus BLEU
BLEU-1: 0.315438
BLEU-2: 0.182538
BLEU-3: 0.142971
BLEU-4: 0.071384

In [None]:
''' Run ----7---- 
Various transformer parameters'''

num_layer = 3
d_model = 256
dff = 2048
num_heads = 8
row_size = 8
col_size = 8
target_vocab_size = top_k + 1
dropout_rate = 0.1 

# Optimizer parameters
beta1 = 0.9
beta2 = 0.98
eps = 1e-9
opt="Adam"
dec= None
lr = 'Custom'
init_acc = None

learning_rate = CustomSchedule(d_model)
optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=beta1, beta_2=beta2,
                                    epsilon=eps)

optimizer = tf.keras.optimizers.Adam(learning_rate)

loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
transformer = Transformer(num_layer,d_model,num_heads,dff,row_size,col_size,target_vocab_size, max_pos_encoding=target_vocab_size,rate=dropout_rate)    

results_df = pd.DataFrame()
results_df =  run_model(results_df)
results_df.to_csv("/content/drive/MyDrive/w266/Adam_CustomLR6_32_epochs.csv", sep='\t', encoding='utf-8')

generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()

lst = [100]
for l in lst:
    evaluate_model2(l)

**output from script above**

635

BLEU-1 score: 0.21368332584053448
BLEU-2 score: 0.18159590803171513
BLEU-3 score: 0.16970042197194982
BLEU-4 score: 0.22516517620830942

Real Caption: an american footballer in white and purple strip is making run with the ball

Predicted Caption: football player runs with the ball chased by members of the opposing team

4351

BLEU-1 score: 0.17113903967753066
BLEU-2 score: 0.29642151188002913
BLEU-3 score: 0.3692614485165609
BLEU-4 score: 0.39011264866539486

Real Caption: large furry dog walking in the sand near large rocks

Predicted Caption: black dog walks on the beach

1639

BLEU-1 score: 0.3
BLEU-2 score: 0.5477225575051661
BLEU-3 score: 0.696845301935949
BLEU-4 score: 0.7400828044922853

Real Caption: two girls giving large dog bath

Predicted Caption: two little girls in summer clothes are washing brown dog

1815

BLEU-1 score: 0
BLEU-2 score: 0
BLEU-3 score: 0
BLEU-4 score: 0

Real Caption: the white and brown dog are playing in the dead grass

Predicted Caption: two dogs play together

Corpus BLEU
BLEU-1: 0.293040
BLEU-2: 0.163002
BLEU-3: 0.119346
BLEU-4: 0.048955

In [None]:
''' Run ----8---- 
Various transformer parameters'''

num_layer = 4
d_model = 256
dff = 2048
num_heads = 8
row_size = 8
col_size = 8
target_vocab_size = top_k + 1
dropout_rate = 0.1 

# Optimizer parameters
beta1 = 0.9
beta2 = 0.98
eps = 1e-9
opt="Adam"
dec= None
lr = 'Custom'
init_acc = None

learning_rate = CustomSchedule(d_model)
optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=beta1, beta_2=beta2,
                                    epsilon=eps)

optimizer = tf.keras.optimizers.Adam(learning_rate)

loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
transformer = Transformer(num_layer,d_model,num_heads,dff,row_size,col_size,target_vocab_size, max_pos_encoding=target_vocab_size,rate=dropout_rate)    

results_df = pd.DataFrame()
results_df =  run_model(results_df)
results_df.to_csv("/content/drive/MyDrive/w266/Adam_CustomLR7_32_epochs.csv", sep='\t', encoding='utf-8')

generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()

lst = [100]
for l in lst:
    evaluate_model2(l)

**Output from script above**

3857

BLEU-1 score: 0.1875
BLEU-2 score: 0.4330127018922193
BLEU-3 score: 0.6052020377096727
BLEU-4 score: 0.6580370064762462

Real Caption: man with backpack sits on fallen log by the river

Predicted Caption: man in blue shirt and backpack is standing in front of rock formation in the woods

4754

BLEU-1 score: 0.2857142857142857
BLEU-2 score: 0.5345224838248488
BLEU-3 score: 0.6867198272427282
BLEU-4 score: 0.7311104457090247

Real Caption: man looks across city street at night

Predicted Caption: man stands at busy bus stop light

2772

BLEU-1 score: 0.15789473684210523
BLEU-2 score: 0.09365858115816939
BLEU-3 score: 0.24150634461947593
BLEU-4 score: 0.30603689509300896

Real Caption: young child is standing alone on some jagged rocks

Predicted Caption: boy in red shirt is standing on top of rock with smile and ice fishing the water behind him

7921

BLEU-1 score: 0.06666666666666667
BLEU-2 score: 0.25819888974716115
BLEU-3 score: 0.44378500343167504
BLEU-4 score: 0.5081327481546147

Real Caption: skateboarder slides down handrail on his feet

Predicted Caption: boy in blue shirt and blue shoes attempts trick off of steps with his skateboard

Corpus BLEU
BLEU-1: 0.294974
BLEU-2: 0.167073
BLEU-3: 0.112043
BLEU-4: 0.049360

In [None]:
# Transformer parameters
num_layer = 5
d_model = 256
dff = 2048
num_heads = 8
row_size = 8
col_size = 8
target_vocab_size = top_k + 1
dropout_rate = 0.1 

# Optimizer parameters
beta1 = 0.9
beta2 = 0.98
eps = 1e-9
opt="Adam"
dec= None
lr = 'Custom'
init_acc = None

learning_rate = CustomSchedule(d_model)
optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=beta1, beta_2=beta2,
                                    epsilon=eps)

optimizer = tf.keras.optimizers.Adam(learning_rate)

loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
transformer = Transformer(num_layer,d_model,num_heads,dff,row_size,col_size,target_vocab_size, max_pos_encoding=target_vocab_size,rate=dropout_rate)    

results_df = pd.DataFrame()
results_df =  run_model(results_df)
results_df.to_csv("/content/drive/MyDrive/w266/Adam_CustomLR8_32_epochs.csv", sep='\t', encoding='utf-8')

generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()

lst = [100]
for l in lst:
    evaluate_model2(l)

**Output from script above**

5654

BLEU-1 score: 0.6666666666666666
BLEU-2 score: 0.6324555320336759
BLEU-3 score: 0.6170338627200096
BLEU-4 score: 0.5081327481546147

Real Caption: collie running through the snow

Predicted Caption: dog is running through the snow

6389

BLEU-1 score: 0
BLEU-2 score: 0
BLEU-3 score: 0
BLEU-4 score: 0

Real Caption: water splashing up against something

Predicted Caption: surfer rides wave under the bright blue sky

1008

BLEU-1 score: 0.1533407357715539
BLEU-2 score: 0.3756065594232468
BLEU-3 score: 0.53748137319499
BLEU-4 score: 0.5878560343277044

Real Caption: two little kids in strollers have face makeup on like the band kiss

Predicted Caption: two women in school uniforms are holding up two flags at carnival

7781

BLEU-1 score: 0.15384615384615385
BLEU-2 score: 0.3922322702763681
BLEU-3 score: 0.5703297064170131
BLEU-4 score: 0.6262844962765469

Real Caption: woman sitting on the beach under pink lawn umbrella

Predicted Caption: woman in blue bathing suit is about to hit ball into the water

Corpus BLEU
BLEU-1: 0.293168
BLEU-2: 0.158293
BLEU-3: 0.115683
BLEU-4: 0.050224

In [None]:
# Transformer parameters
num_layer = 2
d_model = 784
dff = 2048
num_heads = 8
row_size = 8
col_size = 8
target_vocab_size = top_k + 1
dropout_rate = 0.1 

# Optimizer parameters
beta1 = 0.9
beta2 = 0.98
eps = 1e-9
opt="Adam"
dec= None
lr = 'Custom'
init_acc = None

learning_rate = CustomSchedule(d_model)
optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=beta1, beta_2=beta2,
                                    epsilon=eps)

optimizer = tf.keras.optimizers.Adam(learning_rate)

loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
transformer = Transformer(num_layer,d_model,num_heads,dff,row_size,col_size,target_vocab_size, max_pos_encoding=target_vocab_size,rate=dropout_rate)    

results_df = pd.DataFrame()
results_df =  run_model(results_df)
results_df.to_csv("/content/drive/MyDrive/w266/Adam_CustomLR9_32_epochs.csv", sep='\t', encoding='utf-8')

generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()

lst = [100]
for l in lst:
    evaluate_model2(l)

**Output from script above**

6829

BLEU-1 score: 0.17113903967753066
BLEU-2 score: 0.29642151188002913
BLEU-3 score: 0.3692614485165609
BLEU-4 score: 0.39011264866539486

Real Caption: four people and baby are crossing the street at crosswalk

Predicted Caption: group of people walking across street

5195

BLEU-1 score: 0.12197089792217974
BLEU-2 score: 0.22268737383712614
BLEU-3 score: 0.28331615729993387
BLEU-4 score: 0.3008952140022968

Real Caption: show jumper wearing blue helmet rides white horse over wooden fence that is decorated with red and yellow flowers

Predicted Caption: rider on horse is in the middle of jump fence

283

BLEU-1 score: 0
BLEU-2 score: 0
BLEU-3 score: 0
BLEU-4 score: 0

Real Caption: man on motorcycle is jumping over dirt hill in front of spectators

Predicted Caption: motocross rider wearing blue uniform flies through the air

5082

BLEU-1 score: 0.2857142857142857
BLEU-2 score: 0.5345224838248488
BLEU-3 score: 0.6867198272427282
BLEU-4 score: 0.7311104457090247

Real Caption: little girl is petting golden dog

Predicted Caption: small girl gives kiss to tan dog

Corpus BLEU
BLEU-1: 0.321845
BLEU-2: 0.184620
BLEU-3: 0.142495
BLEU-4: 0.069751

In [None]:
# Transformer parameters
num_layer = 3
d_model = 784
dff = 2048
num_heads = 8
row_size = 8
col_size = 8
target_vocab_size = top_k + 1
dropout_rate = 0.1 

# Optimizer parameters
beta1 = 0.9
beta2 = 0.98
eps = 1e-9
opt="Adam"
dec= None
lr = 'Custom'
init_acc = None

learning_rate = CustomSchedule(d_model)
optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=beta1, beta_2=beta2,
                                    epsilon=eps)

optimizer = tf.keras.optimizers.Adam(learning_rate)

loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
transformer = Transformer(num_layer,d_model,num_heads,dff,row_size,col_size,target_vocab_size, max_pos_encoding=target_vocab_size,rate=dropout_rate)    

results_df = pd.DataFrame()
results_df =  run_model(results_df)
results_df.to_csv("/content/drive/MyDrive/w266/Adam_CustomLR10_32_epochs.csv", sep='\t', encoding='utf-8')

generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()

lst = [100]
for l in lst:
    evaluate_model2(l)

**Output from script above**

1387

BLEU-1 score: 0.1972775711532852
BLEU-2 score: 0.22056302976739864
BLEU-3 score: 0.23062942776415837
BLEU-4 score: 0.23321700945343304

Real Caption: black and white dog is running though water whilst bearing its teeth

Predicted Caption: black dog running through water

7795

BLEU-1 score: 0.4
BLEU-2 score: 0.21081851067789192
BLEU-3 score: 0.3929571945508051
BLEU-4 score: 0.4591497693322865

Real Caption: black cow and brown cow being chased by dog

Predicted Caption: brown and black cow jumps away from small black bull

6380

BLEU-1 score: 0.14285714285714285
BLEU-2 score: 0.3779644730092272
BLEU-3 score: 0.5577898253032461
BLEU-4 score: 0.6147881529512643

Real Caption: five people in gym

Predicted Caption: man in black shirt stares at swordsman

6070

BLEU-1 score: 0.10510841176326924
BLEU-2 score: 0.1966398326430567
BLEU-3 score: 0.2526301062874043
BLEU-4 score: 0.26896050220204015

Real Caption: closeup of white bunny with another white bunny and black horse in the background

Predicted Caption: two white sit on the green grass

Corpus BLEU
BLEU-1: 0.244681
BLEU-2: 0.136254
BLEU-3: 0.099739
BLEU-4: 0.045904

In [None]:
# Transformer parameters
num_layer = 4
d_model = 784
dff = 2048
num_heads = 8
row_size = 8
col_size = 8
target_vocab_size = top_k + 1
dropout_rate = 0.1 

# Optimizer parameters
beta1 = 0.9
beta2 = 0.98
eps = 1e-9
opt="Adam"
dec= None
lr = 'Custom'
init_acc = None

learning_rate = CustomSchedule(d_model)
optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=beta1, beta_2=beta2,
                                    epsilon=eps)

optimizer = tf.keras.optimizers.Adam(learning_rate)

loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
transformer = Transformer(num_layer,d_model,num_heads,dff,row_size,col_size,target_vocab_size, max_pos_encoding=target_vocab_size,rate=dropout_rate)    

results_df = pd.DataFrame()
results_df =  run_model(results_df)
results_df.to_csv("/content/drive/MyDrive/w266/Adam_CustomLR11_32_epochs.csv", sep='\t', encoding='utf-8')

generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()

lst = [100]
for l in lst:
    evaluate_model2(l)

**Output from script above**

3319

BLEU-1 score: 0
BLEU-2 score: 0
BLEU-3 score: 0
BLEU-4 score: 0

Real Caption: pigeons framed against looming storm

Predicted Caption: group of birds are lined up in the air

2814

BLEU-1 score: 0.0909090909090909
BLEU-2 score: 0.30151134457776363
BLEU-3 score: 0.48705969722582854
BLEU-4 score: 0.5491004867761125

Real Caption: three women in saris dance near large flag

Predicted Caption: group of people are sitting on the edge of large rock

5461

BLEU-1 score: 0.20072303569462135
BLEU-2 score: 0.32777934471618453
BLEU-3 score: 0.3988185306688956
BLEU-4 score: 0.4188647040415359

Real Caption: baseball player is making play nearby large sign and boundary of the field

Predicted Caption: the man is wearing red and white shirt

6870

BLEU-1 score: 0.2952290954631342
BLEU-2 score: 0.19961130092025536
BLEU-3 score: 0.1645801126902395
BLEU-4 score: 0.19619788401571467

Real Caption: man wearing an orange coat is snowboarding in the air after going over hill

Predicted Caption: man is snowboarding in midair on the snow

Corpus BLEU
BLEU-1: 0.197970
BLEU-2: 0.090832
BLEU-3: 0.050337
BLEU-4: 0.081873


In [None]:
# Transformer parameters
num_layer = 2
d_model = 128
dff = 2048
num_heads = 8
row_size = 8
col_size = 8
target_vocab_size = top_k + 1
dropout_rate = 0.1 

# Optimizer parameters
beta1 = 0.9
beta2 = 0.98
eps = 1e-9
opt="Adam"
dec= None
lr = 'Custom'
init_acc = None

learning_rate = CustomSchedule(d_model)
optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=beta1, beta_2=beta2,
                                    epsilon=eps)

optimizer = tf.keras.optimizers.Adam(learning_rate)

loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
transformer = Transformer(num_layer,d_model,num_heads,dff,row_size,col_size,target_vocab_size, max_pos_encoding=target_vocab_size,rate=dropout_rate)    

results_df = pd.DataFrame()
results_df =  run_model(results_df)
results_df.to_csv("/content/drive/MyDrive/w266/Adam_CustomLR12_32_epochs.csv", sep='\t', encoding='utf-8')

generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()

lst = [100]
for l in lst:
    evaluate_model2(l)

**Output from script above**


6562

BLEU-1 score: 0.2207276647028654
BLEU-2 score: 0.14247909490690874
BLEU-3 score: 0.20822501201353516
BLEU-4 score: 0.22894350790744533

Real Caption: two people leaning against railing outside the <unk> of building

Predicted Caption: two people stand outside library

3587

BLEU-1 score: 0.0909090909090909
BLEU-2 score: 0.30151134457776363
BLEU-3 score: 0.48705969722582854
BLEU-4 score: 0.5491004867761125

Real Caption: the sooners quarterback is waiting for the snap from his center

Predicted Caption: football player in red winter gear is surrounded by football players

4847

BLEU-1 score: 0.5
BLEU-2 score: 0.316227766016838
BLEU-3 score: 0.5011872336272724
BLEU-4 score: 0.5623413251903491

Real Caption: man sits at table in room

Predicted Caption: man sits in diner reading photograph

6286

BLEU-1 score: 0.44485411273156
BLEU-2 score: 0.29841721084143685
BLEU-3 score: 0.4428843827462494
BLEU-4 score: 0.4888290318657942

Real Caption: the girl is swimming with only her head above the water

Predicted Caption: girl is in the water and splashing around her

Corpus BLEU
BLEU-1: 0.314415
BLEU-2: 0.195441
BLEU-3: 0.145616
BLEU-4: 0.069956


In [None]:
# Transformer parameters
num_layer = 3
d_model = 128
dff = 2048
num_heads = 8
row_size = 8
col_size = 8
target_vocab_size = top_k + 1
dropout_rate = 0.1 

# Optimizer parameters
beta1 = 0.9
beta2 = 0.98
eps = 1e-9
opt="Adam"
dec= None
lr = 'Custom'
init_acc = None

learning_rate = CustomSchedule(d_model)
optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=beta1, beta_2=beta2,
                                    epsilon=eps)

optimizer = tf.keras.optimizers.Adam(learning_rate)

loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
transformer = Transformer(num_layer,d_model,num_heads,dff,row_size,col_size,target_vocab_size, max_pos_encoding=target_vocab_size,rate=dropout_rate)    

results_df = pd.DataFrame()
results_df =  run_model(results_df)
results_df.to_csv("/content/drive/MyDrive/w266/Adam_CustomLR13_32_epochs.csv", sep='\t', encoding='utf-8')

generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()

lst = [100]
for l in lst:
    evaluate_model2(l)

**Output from script above**

6780

BLEU-1 score: 0.07124226538110606
BLEU-2 score: 0.2137267961433182
BLEU-3 score: 0.33167098262211875
BLEU-4 score: 0.3701856698591431

Real Caption: tan dog walks along grassy path with his long pink tongue hanging out

Predicted Caption: brown dog is running through field of tall grass

7216

BLEU-1 score: 0.5
BLEU-2 score: 0.447213595499958
BLEU-3 score: 0.4070905315369044
BLEU-4 score: 0.4728708045015879

Real Caption: biker performs trick in the air

Predicted Caption: man on bicycle in the air

6536

BLEU-1 score: 0.42857142857142855
BLEU-2 score: 0.26726124191242434
BLEU-3 score: 0.4530661222656876
BLEU-4 score: 0.5169731539571706

Real Caption: skateboarder wearing green shirt jumping on ramp

Predicted Caption: skateboarder in green shirt is performing jump

6937

BLEU-1 score: 0.36653744525105364
BLEU-2 score: 0.20392260927133252
BLEU-3 score: 0.16085956370983176
BLEU-4 score: 0.2045814954299735

Real Caption: group of people standing on snow covered ground some are holding brooms there is dog with the group

Predicted Caption: group of people in red and black walking through the snow with dog

Corpus BLEU
BLEU-1: 0.307307
BLEU-2: 0.180690
BLEU-3: 0.133195
BLEU-4: 0.054294

In [None]:
# Transformer parameters
num_layer = 4
d_model = 128
dff = 2048
num_heads = 8
row_size = 8
col_size = 8
target_vocab_size = top_k + 1
dropout_rate = 0.1 

# Optimizer parameters
beta1 = 0.9
beta2 = 0.98
eps = 1e-9
opt="Adam"
dec= None
lr = 'Custom'
init_acc = None

learning_rate = CustomSchedule(d_model)
optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=beta1, beta_2=beta2,
                                    epsilon=eps)

optimizer = tf.keras.optimizers.Adam(learning_rate)

loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
transformer = Transformer(num_layer,d_model,num_heads,dff,row_size,col_size,target_vocab_size, max_pos_encoding=target_vocab_size,rate=dropout_rate)    

results_df = pd.DataFrame()
results_df =  run_model(results_df)
results_df.to_csv("/content/drive/MyDrive/w266/Adam_CustomLR14_32_epochs.csv", sep='\t', encoding='utf-8')

generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()

lst = [100]
for l in lst:
    evaluate_model2(l)

**Output from script above**

1892

BLEU-1 score: 0.28496906152442425
BLEU-2 score: 0.4274535922866364
BLEU-3 score: 0.5027192029911277
BLEU-4 score: 0.5235215949109693

Real Caption: two children wearing white capes are running through <unk> out path in field

Predicted Caption: two people are walking through field of grass and

6125

BLEU-1 score: 0.27715355752967225
BLEU-2 score: 0.14284167402631745
BLEU-3 score: 0.1117165042969848
BLEU-4 score: 0.14738196645255086

Real Caption: man is standing with his arms folded looking at something while the person behind him is jumping onto one of the two beds in the room

Predicted Caption: man in red shirt is laying on the bed with his arms stretched out to the side

531

BLEU-1 score: 0.3
BLEU-2 score: 0.5477225575051661
BLEU-3 score: 0.696845301935949
BLEU-4 score: 0.7400828044922853

Real Caption: woman with brown hair and brown eyes wearing green shirt

Predicted Caption: woman in blue shirt and orange glasses is her sunglasses

2889

BLEU-1 score: 0.27973809117540177
BLEU-2 score: 0.2136534962622895
BLEU-3 score: 0.2976746589772035
BLEU-4 score: 0.3234073084059581

Real Caption: two dogs fight over stick on grassy field lake in the background

Predicted Caption: two dogs are playing in the water

Corpus BLEU
BLEU-1: 0.274017
BLEU-2: 0.155493
BLEU-3: 0.113558
BLEU-4: 0.056711

In [None]:
# Transformer parameters
num_layer = 2
d_model = 1024
dff = 2048
num_heads = 8
row_size = 8
col_size = 8
target_vocab_size = top_k + 1
dropout_rate = 0.1 

# Optimizer parameters
beta1 = 0.9
beta2 = 0.98
eps = 1e-9
opt="Adam"
dec= None
lr = 'Custom'
init_acc = None

learning_rate = CustomSchedule(d_model)
optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=beta1, beta_2=beta2,
                                    epsilon=eps)

optimizer = tf.keras.optimizers.Adam(learning_rate)

loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
transformer = Transformer(num_layer,d_model,num_heads,dff,row_size,col_size,target_vocab_size, max_pos_encoding=target_vocab_size,rate=dropout_rate)    

results_df = pd.DataFrame()
results_df =  run_model(results_df)
results_df.to_csv("/content/drive/MyDrive/w266/Adam_CustomLR15_32_epochs.csv", sep='\t', encoding='utf-8')

generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()

lst = [100]
for l in lst:
    evaluate_model2(l)

**Output from script above**

185

BLEU-1 score: 0.15384615384615385
BLEU-2 score: 0.3922322702763681
BLEU-3 score: 0.5703297064170131
BLEU-4 score: 0.6262844962765469

Real Caption: kid playing in swimming pool

Predicted Caption: boy in blue bathing suit dives into pool while two adults look on

4990

BLEU-1 score: 0.35826565528689464
BLEU-2 score: 0.5066641486392106
BLEU-3 score: 0.5820042740778344
BLEU-4 score: 0.6025286104785453

Real Caption: the man wearing brown shirt clings to rock

Predicted Caption: man climbs the bottom of rock

703

BLEU-1 score: 0.0588235294117647
BLEU-2 score: 0.24253562503633297
BLEU-3 score: 0.42743031778825946
BLEU-4 score: 0.4924790605054523

Real Caption: mom is helping her son to rollerblade

Predicted Caption: an ice skating with little boy in red shirt and orange shirt is holding an orange shirt

494
BLEU-1 score: 0.3274923012311928
BLEU-2 score: 0.5178107940302672
BLEU-3 score: 0.6219551968890802
BLEU-4 score: 0.6511126026643229

Real Caption: black puppy is biting tree limb

Predicted Caption: puppy chewing on tree branch

Corpus BLEU
BLEU-1: 0.323208
BLEU-2: 0.189167
BLEU-3: 0.147405
BLEU-4: 0.066983

In [None]:
# Transformer parameters
num_layer = 3
d_model = 1024
dff = 2048
num_heads = 8
row_size = 8
col_size = 8
target_vocab_size = top_k + 1
dropout_rate = 0.1 

# Optimizer parameters
beta1 = 0.9
beta2 = 0.98
eps = 1e-9
opt="Adam"
dec= None
lr = 'Custom'
init_acc = None

learning_rate = CustomSchedule(d_model)
optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=beta1, beta_2=beta2,
                                    epsilon=eps)

optimizer = tf.keras.optimizers.Adam(learning_rate)

loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
transformer = Transformer(num_layer,d_model,num_heads,dff,row_size,col_size,target_vocab_size, max_pos_encoding=target_vocab_size,rate=dropout_rate)    

results_df = pd.DataFrame()
results_df =  run_model(results_df)
results_df.to_csv("/content/drive/MyDrive/w266/Adam_CustomLR16_32_epochs.csv", sep='\t', encoding='utf-8')

generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()

lst = [100]
for l in lst:
    evaluate_model2(l)

**output from script above**

1670

BLEU-1 score: 0.13333333333333333
BLEU-2 score: 0.36514837167011077
BLEU-3 score: 0.5463634277011612
BLEU-4 score: 0.6042750794713536

Real Caption: small dog with long hair holds snowman toy in its mouth

Predicted Caption: the little boy in the red and black collar is running with people behind him

1221

BLEU-1 score: 0.029999999999999995
BLEU-2 score: 0.17320508075688773
BLEU-3 score: 0.34924996914343953
BLEU-4 score: 0.4161791450287817

Real Caption: two young adults are setting up the video camera for some action

Predicted Caption: two people are standing around on the side of road and dirt and dirt and dirt and dirt and dirt and dirt and dirt and dirt and dirt and dirt and dirt and dirt and dirt and dirt and dirt and dirt and dirt and dirt and dirt and dirt and dirt and dirt and dirt and dirt and dirt and dirt and dirt and dirt and dirt and dirt and dirt and dirt and dirt and dirt and dirt and dirt and dirt and dirt and dirt and dirt and dirt and dirt and dirt and dirt and dirt

6102

BLEU-1 score: 0.2196642817631056
BLEU-2 score: 0.1521878786487298
BLEU-3 score: 0.1895851490671745
BLEU-4 score: 0.20029051217596075

Real Caption: two dogs one tan and one black play on the grass near the pond

Predicted Caption: two dogs play in the grass

7160

BLEU-1 score: 0.08333333333333333
BLEU-2 score: 0.28867513459481287
BLEU-3 score: 0.4745102806263551
BLEU-4 score: 0.537284965911771

Real Caption: two children run towards the lagoon

Predicted Caption: man and woman are standing on the beach looking at the sky

Corpus BLEU
BLEU-1: 0.205915
BLEU-2: 0.105566
BLEU-3: 0.076198
BLEU-4: 0.033261

In [None]:
# Transformer parameters
num_layer = 4
d_model = 1024
dff = 2048
num_heads = 8
row_size = 8
col_size = 8
target_vocab_size = top_k + 1
dropout_rate = 0.1 

# Optimizer parameters
beta1 = 0.9
beta2 = 0.98
eps = 1e-9
opt="Adam"
dec= None
lr = 'Custom'
init_acc = None

learning_rate = CustomSchedule(d_model)
optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=beta1, beta_2=beta2,
                                    epsilon=eps)

optimizer = tf.keras.optimizers.Adam(learning_rate)

loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
transformer = Transformer(num_layer,d_model,num_heads,dff,row_size,col_size,target_vocab_size, max_pos_encoding=target_vocab_size,rate=dropout_rate)    

results_df = pd.DataFrame()
results_df =  run_model(results_df)
results_df.to_csv("/content/drive/MyDrive/w266/Adam_CustomLR17_32_epochs.csv", sep='\t', encoding='utf-8')

generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()

lst = [100]
for l in lst:
    evaluate_model2(l)

**Output from script above**

2581

BLEU-1 score: 0.21729910425353913
BLEU-2 score: 0.1374320205911133
BLEU-3 score: 0.21781507386103105
BLEU-4 score: 0.24439253249722204

Real Caption: three black dogs and white dog are standing in the snow

Predicted Caption: two dogs run through the snow

6332

BLEU-1 score: 0.2222222222222222
BLEU-2 score: 0.4714045207910317
BLEU-3 score: 0.6368486695666434
BLEU-4 score: 0.6865890479690392

Real Caption: brown dog running with long stick in his mouth

Predicted Caption: black and white dog is running through grassy field

3620

BLEU-1 score: 0
BLEU-2 score: 0
BLEU-3 score: 0
BLEU-4 score: 0

Real Caption: small dog in costume stands on hind legs to reach dangling flowers

Predicted Caption: two dogs are playing with each other

3370

BLEU-1 score: 0.3436446393954861
BLEU-2 score: 0.18368578620276968
BLEU-3 score: 0.31138748841660685
BLEU-4 score: 0.355310106137518

Real Caption: person in yellow slicker is driving motorboat next to the beach

Predicted Caption: man in red shorts is on the beach

Corpus BLEU
BLEU-1: 0.156364
BLEU-2: 0.076062
BLEU-3: 0.059788
BLEU-4: 0.023660

In [None]:
# Transformer parameters
num_layer = 2
d_model = 2048
dff = 2048
num_heads = 8
row_size = 8
col_size = 8
target_vocab_size = top_k + 1
dropout_rate = 0.1 

# Optimizer parameters
beta1 = 0.9
beta2 = 0.98
eps = 1e-9
opt="Adam"
dec= None
lr = 'Custom'
init_acc = None

learning_rate = CustomSchedule(d_model)
optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=beta1, beta_2=beta2,
                                    epsilon=eps)

optimizer = tf.keras.optimizers.Adam(learning_rate)

loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
transformer = Transformer(num_layer,d_model,num_heads,dff,row_size,col_size,target_vocab_size, max_pos_encoding=target_vocab_size,rate=dropout_rate)    

results_df = pd.DataFrame()
results_df =  run_model(results_df)
results_df.to_csv("/content/drive/MyDrive/w266/Adam_CustomLR18_32_epochs.csv", sep='\t', encoding='utf-8')

generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()

lst = [100]
for l in lst:
    evaluate_model2(l)

**Output from script above**

3638

BLEU-1 score: 0.17113903967753066
BLEU-2 score: 0.29642151188002913
BLEU-3 score: 0.3692614485165609
BLEU-4 score: 0.39011264866539486

Real Caption: girl is climbing rock wall

Predicted Caption: rock climber ascends

2496

BLEU-1 score: 0.2207276647028654
BLEU-2 score: 0.14247909490690874
BLEU-3 score: 0.20822501201353516
BLEU-4 score: 0.22894350790744533

Real Caption: young girl is blowing bubbles with an orange bubble wand

Predicted Caption: small girl blowing bubbles outside

3156

BLEU-1 score: 0.4444444444444444
BLEU-2 score: 0.23570226039551587
BLEU-3 score: 0.42016342872918955
BLEU-4 score: 0.48549177170732344

Real Caption: the man is wearing white shirt and sunglasses

Predicted Caption: man with hat and sunglasses is holding paper cup

2758

BLEU-1 score: 0.25
BLEU-2 score: 0.5
BLEU-3 score: 0.6597539553864471
BLEU-4 score: 0.7071067811865476

Real Caption: people are playing in water fountains

Predicted Caption: kids are standing in the sprinklers getting soaked

Corpus BLEU
BLEU-1: 0.257290
BLEU-2: 0.143157
BLEU-3: 0.106531
BLEU-4: 0.046010

In [None]:
# Transformer parameters
num_layer = 3
d_model = 2048
dff = 2048
num_heads = 8
row_size = 8
col_size = 8
target_vocab_size = top_k + 1
dropout_rate = 0.1 

# Optimizer parameters
beta1 = 0.9
beta2 = 0.98
eps = 1e-9
opt="Adam"
dec= None
lr = 'Custom'
init_acc = None

learning_rate = CustomSchedule(d_model)
optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=beta1, beta_2=beta2,
                                    epsilon=eps)

optimizer = tf.keras.optimizers.Adam(learning_rate)

loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
transformer = Transformer(num_layer,d_model,num_heads,dff,row_size,col_size,target_vocab_size, max_pos_encoding=target_vocab_size,rate=dropout_rate)    

results_df = pd.DataFrame()
results_df =  run_model(results_df)
results_df.to_csv("/content/drive/MyDrive/w266/Adam_CustomLR19_32_epochs.csv", sep='\t', encoding='utf-8')

generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()

lst = [100]
for l in lst:
    evaluate_model2(l)

**output from script above**

6954

BLEU-1 score: 0.20980356838155134
BLEU-2 score: 0.32048024439343425
BLEU-3 score: 0.3796615532749524
BLEU-4 score: 0.3960914423407551

Real Caption: there are four smiling boys with smiles on playground with their ball

Predicted Caption: two young boys play with soccer ball
6499

BLEU-1 score: 0.07142857142857141
BLEU-2 score: 0.26726124191242434
BLEU-3 score: 0.45306612226568754
BLEU-4 score: 0.5169731539571706

Real Caption: woman in wheelchair and toddler at park

Predicted Caption: the boy is wearing blue shirt and blue shirt is jumping off of hay

625

BLEU-1 score: 0.16666666666666669
BLEU-2 score: 0.408248290463863
BLEU-3 score: 0.5841906810678655
BLEU-4 score: 0.6389431042462724

Real Caption: brown dog sits still on hillside

Predicted Caption: dog is running through the woods

1143

BLEU-1 score: 0
BLEU-2 score: 0
BLEU-3 score: 0
BLEU-4 score: 0

Real Caption: little girl in brown shirt and <unk> skirt dances on wood floor

Predicted Caption: two young girls are holding stuffed animals

Corpus BLEU
BLEU-1: 0.179539
BLEU-2: 0.098847
BLEU-3: 0.073342
BLEU-4: 0.035335


In [None]:
# Transformer parameters
num_layer = 2
d_model = 784
dff = 4096
num_heads = 8
row_size = 8
col_size = 8
target_vocab_size = top_k + 1
dropout_rate = 0.1 

# Optimizer parameters
beta1 = 0.9
beta2 = 0.98
eps = 1e-9
opt="Adam"
dec= None
lr = 'Custom'
init_acc = None

learning_rate = CustomSchedule(d_model)
optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=beta1, beta_2=beta2,
                                    epsilon=eps)

optimizer = tf.keras.optimizers.Adam(learning_rate)

loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
transformer = Transformer(num_layer,d_model,num_heads,dff,row_size,col_size,target_vocab_size, max_pos_encoding=target_vocab_size,rate=dropout_rate)    

results_df = pd.DataFrame()
results_df =  run_model(results_df)
results_df.to_csv("/content/drive/MyDrive/w266/Adam_CustomLR20_32_epochs.csv", sep='\t', encoding='utf-8')

generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()

lst = [100]
for l in lst:
    evaluate_model2(l)

**Output from script above**

352

BLEU-1 score: 0.45489799478447507
BLEU-2 score: 0.30326532985631666
BLEU-3 score: 0.40016100180856107
BLEU-4 score: 0.4288819424803534

Real Caption: three dogs playing in yard together

Predicted Caption: three dogs play together

3618

BLEU-1 score: 0.2222222222222222
BLEU-2 score: 0.16666666666666669
BLEU-3 score: 0.34127875184653655
BLEU-4 score: 0.408248290463863

Real Caption: group of people gather around truck

Predicted Caption: crowd of people in knit hats standing near van

5813

BLEU-1 score: 0.17794164509262397
BLEU-2 score: 0.3774712317014531
BLEU-3 score: 0.5099485497198184
BLEU-4 score: 0.5497775311418521

Real Caption: man is walking down dirt road with shop on his right

Predicted Caption: man walks down an empty street next to market

797

BLEU-1 score: 0
BLEU-2 score: 0
BLEU-3 score: 0
BLEU-4 score: 0

Real Caption: cricketer wielding wears white suit and black helmet with face guard

Predicted Caption: cricket player on field swinging bat

Corpus BLEU
BLEU-1: 0.310949
BLEU-2: 0.176910
BLEU-3: 0.130282
BLEU-4: 0.055606

In [None]:
# Transformer parameters
num_layer = 2
d_model = 1024
dff = 4096
num_heads = 8
row_size = 8
col_size = 8
target_vocab_size = top_k + 1
dropout_rate = 0.1 

# Optimizer parameters
beta1 = 0.9
beta2 = 0.98
eps = 1e-9
opt="Adam"
dec= None
lr = 'Custom'
init_acc = None

learning_rate = CustomSchedule(d_model)
optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=beta1, beta_2=beta2,
                                    epsilon=eps)

optimizer = tf.keras.optimizers.Adam(learning_rate)

loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
transformer = Transformer(num_layer,d_model,num_heads,dff,row_size,col_size,target_vocab_size, max_pos_encoding=target_vocab_size,rate=dropout_rate)    

results_df = pd.DataFrame()
results_df =  run_model(results_df)
results_df.to_csv("/content/drive/MyDrive/w266/Adam_CustomLR21_32_epochs.csv", sep='\t', encoding='utf-8')

generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()

lst = [100]
for l in lst:
    evaluate_model2(l)

**Output from script above**

3351

BLEU-1 score: 0.25
BLEU-2 score: 0.5
BLEU-3 score: 0.6597539553864471
BLEU-4 score: 0.7071067811865476

Real Caption: young boy is looking through binoculars at playground

Predicted Caption: boy in hat and red shirt is holding camera near playground equipment

7896

BLEU-1 score: 0.06666666666666667
BLEU-2 score: 0.25819888974716115
BLEU-3 score: 0.44378500343167504
BLEU-4 score: 0.5081327481546147

Real Caption: busy street full of cars and bike riders

Predicted Caption: five people on bikes in traffic with man watching from the side of the road

7695

BLEU-1 score: 0.07581633246407919
BLEU-2 score: 0.21444097124017672
BLEU-3 score: 0.32503173264731566
BLEU-4 score: 0.36064528799877893

Real Caption: man is slouching on couch with people walking and sitting around him

Predicted Caption: man wearing hat sleeping in large brown chair

3686

BLEU-1 score: 0.3333333333333333
BLEU-2 score: 0.20412414523193154
BLEU-3 score: 0.3854221125344267
BLEU-4 score: 0.45180100180492244

Real Caption: two dogs are wrestling over piece of black material

Predicted Caption: two dogs holding single black frisbee in their mouths

Corpus BLEU
BLEU-1: 0.307629
BLEU-2: 0.175931
BLEU-3: 0.128278
BLEU-4: 0.054852

In [None]:
# Transformer parameters
num_layer = 2
d_model = 784
dff = 5120
num_heads = 8
row_size = 8
col_size = 8
target_vocab_size = top_k + 1
dropout_rate = 0.1 

# Optimizer parameters
beta1 = 0.9
beta2 = 0.98
eps = 1e-9
opt="Adam"
dec= None
lr = 'Custom'
init_acc = None

learning_rate = CustomSchedule(d_model)
optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=beta1, beta_2=beta2,
                                    epsilon=eps)

optimizer = tf.keras.optimizers.Adam(learning_rate)

loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
transformer = Transformer(num_layer,d_model,num_heads,dff,row_size,col_size,target_vocab_size, max_pos_encoding=target_vocab_size,rate=dropout_rate)    

results_df = pd.DataFrame()
results_df =  run_model(results_df)
results_df.to_csv("/content/drive/MyDrive/w266/Adam_CustomLR22_32_epochs.csv", sep='\t', encoding='utf-8')

generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()

lst = [100]
for l in lst:
    evaluate_model2(l)

**Output from script above**

3563

BLEU-1 score: 0.3980729503187718
BLEU-2 score: 0.1888225796865554
BLEU-3 score: 0.3219040383797589
BLEU-4 score: 0.3678278000487881

Real Caption: an elderly woman in red jacket is walking up hilly city street

Predicted Caption: an elderly lady crosses the street in the city

5148

BLEU-1 score: 0.17573142541048448
BLEU-2 score: 0.13612097680401944
BLEU-3 score: 0.17730921639529523
BLEU-4 score: 0.18942307125336327

Real Caption: the white dog is hard to see in the snow as it runs <unk>

Predicted Caption: white dog walks through the snow

3912

BLEU-1 score: 0.36363636363636365
BLEU-2 score: 0.26967994498529685
BLEU-3 score: 0.4555218258274121
BLEU-4 score: 0.5193071778680676

Real Caption: two dogs are running together through the grass

Predicted Caption: two dogs on the grass walking in front of brown dog

1890

BLEU-1 score: 0.23618327637050734
BLEU-2 score: 0.33401359264888447
BLEU-3 score: 0.3836808644224233
BLEU-4 score: 0.39721134088567395

Real Caption: the brown dog splashes through the water

Predicted Caption: dog plays in water

Corpus BLEU
BLEU-1: 0.296181
BLEU-2: 0.168487
BLEU-3: 0.127941
BLEU-4: 0.055064

In [None]:
# Transformer parameters
num_layer = 2
d_model = 1024
dff = 5120
num_heads = 8
row_size = 8
col_size = 8
target_vocab_size = top_k + 1
dropout_rate = 0.1 

# Optimizer parameters
beta1 = 0.9
beta2 = 0.98
eps = 1e-9
opt="Adam"
dec= None
lr = 'Custom'
init_acc = None

learning_rate = CustomSchedule(d_model)
optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=beta1, beta_2=beta2,
                                    epsilon=eps)

optimizer = tf.keras.optimizers.Adam(learning_rate)

loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
transformer = Transformer(num_layer,d_model,num_heads,dff,row_size,col_size,target_vocab_size, max_pos_encoding=target_vocab_size,rate=dropout_rate)    

results_df = pd.DataFrame()
results_df =  run_model(results_df)
results_df.to_csv("/content/drive/MyDrive/w266/Adam_CustomLR23_32_epochs.csv", sep='\t', encoding='utf-8')

generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()

lst = [100]
for l in lst:
    evaluate_model2(l)

**Output from script above**

978

BLEU-1 score: 0.023161150407600514
BLEU-2 score: 0.0401162892677087
BLEU-3 score: 0.04997410272335065
BLEU-4 score: 0.052796005801316395

Real Caption: young boy standing with raised leg in large pile of leaves

Predicted Caption: boy kicks mud

1048

BLEU-1 score: 0.2
BLEU-2 score: 0.447213595499958
BLEU-3 score: 0.6170338627200097
BLEU-4 score: 0.668740304976422

Real Caption: snowboarder jumps through the air on snowy hill

Predicted Caption: someone in yellow pants is on ramp over the snow

7414

BLEU-1 score: 0.2222222222222222
BLEU-2 score: 0.4714045207910317
BLEU-3 score: 0.6368486695666434
BLEU-4 score: 0.6865890479690392

Real Caption: girls dance in colorful outfits

Predicted Caption: women in outfits are performing on low cut field

1941

BLEU-1 score: 0
BLEU-2 score: 0
BLEU-3 score: 0
BLEU-4 score: 0

Real Caption: this person seated on type of is riding downhill

Predicted Caption: two people are walking down forest path

Corpus BLEU
BLEU-1: 0.307673
BLEU-2: 0.180198
BLEU-3: 0.143205
BLEU-4: 0.071393

In [None]:
# Transformer parameters
num_layer = 2
d_model = 784
dff = 8192
num_heads = 8
row_size = 8
col_size = 8
target_vocab_size = top_k + 1
dropout_rate = 0.1 

# Optimizer parameters
beta1 = 0.9
beta2 = 0.98
eps = 1e-9
opt="Adam"
dec= None
lr = 'Custom'
init_acc = None

learning_rate = CustomSchedule(d_model)
optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=beta1, beta_2=beta2,
                                    epsilon=eps)

optimizer = tf.keras.optimizers.Adam(learning_rate)

loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
transformer = Transformer(num_layer,d_model,num_heads,dff,row_size,col_size,target_vocab_size, max_pos_encoding=target_vocab_size,rate=dropout_rate)    

results_df = pd.DataFrame()
results_df =  run_model(results_df)
results_df.to_csv("/content/drive/MyDrive/w266/Adam_CustomLR24_32_epochs.csv", sep='\t', encoding='utf-8')

generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()

lst = [100]
for l in lst:
    evaluate_model2(l)

**Output from script above**

5788

BLEU-1 score: 0.09942659075715217
BLEU-2 score: 0.2982797722714565
BLEU-3 score: 0.46288414438794134
BLEU-4 score: 0.5166357204442371

Real Caption: man performing trick on skateboard at graffiti covered skate park

Predicted Caption: boy is making his skateboard jump over white candles

1640

BLEU-1 score: 0.20980356838155134
BLEU-2 score: 0.13083551190106055
BLEU-3 score: 0.22179474138297817
BLEU-4 score: 0.25307989573458556

Real Caption: basketball player with on jersey jumps to make shot crowd in background

Predicted Caption: basketball player preparing to shoot the ball

4929

BLEU-1 score: 0.4
BLEU-2 score: 0.21081851067789192
BLEU-3 score: 0.3929571945508051
BLEU-4 score: 0.4591497693322865

Real Caption: boy in an orange jersey is about to pitch baseball
Predicted Caption: the boy in the orange shirt is throwing the ball

4720

BLEU-1 score: 0.2920502936517768
BLEU-2 score: 0.18025735467152731
BLEU-3 score: 0.3236729066119438
BLEU-4 score: 0.3746792881553041

Real Caption: man in swim trunks and red life vest jet skiing

Predicted Caption: man wearing red life jacket jumping into water

Corpus BLEU
BLEU-1: 0.298737
BLEU-2: 0.164432
BLEU-3: 0.124708
BLEU-4: 0.050601

In [None]:
# Transformer parameters
num_layer = 2
d_model = 1024
dff = 8192
num_heads = 8
row_size = 8
col_size = 8
target_vocab_size = top_k + 1
dropout_rate = 0.1 

# Optimizer parameters
beta1 = 0.9
beta2 = 0.98
eps = 1e-9
opt="Adam"
dec= None
lr = 'Custom'
init_acc = None

learning_rate = CustomSchedule(d_model)
optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=beta1, beta_2=beta2,
                                    epsilon=eps)

optimizer = tf.keras.optimizers.Adam(learning_rate)

loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
transformer = Transformer(num_layer,d_model,num_heads,dff,row_size,col_size,target_vocab_size, max_pos_encoding=target_vocab_size,rate=dropout_rate)    

results_df = pd.DataFrame()
results_df =  run_model(results_df)
results_df.to_csv("/content/drive/MyDrive/w266/Adam_CustomLR25_32_epochs.csv", sep='\t', encoding='utf-8')

generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()

lst = [100]
for l in lst:
    evaluate_model2(l)

**Output from script above**
 
1538

BLEU-1 score: 0
BLEU-2 score: 0
BLEU-3 score: 0
BLEU-4 score: 0

Real Caption: person in an airplane

Predicted Caption: small white plane with spinning ground pilot seen through windows

6176

BLEU-1 score: 0.36193496721438384
BLEU-2 score: 0.19075647687597008
BLEU-3 score: 0.3555623733160047
BLEU-4 score: 0.41545589177443254

Real Caption: boy with an orange shirt lies on bodyboard in the surf

Predicted Caption: child in orange is on boogie board in the waves

2749

BLEU-1 score: 0.14285714285714285
BLEU-2 score: 0.3779644730092272
BLEU-3 score: 0.5577898253032461
BLEU-4 score: 0.6147881529512643

Real Caption: young man stands at the foot of cart

Predicted Caption: man in blue shirt and jeans stands behind sign that says mark mom finley

4747

BLEU-1 score: 0.37151909989293497
BLEU-2 score: 0.3276490485424231
BLEU-3 score: 0.2983578836180589
BLEU-4 score: 0.3564026463354183

Real Caption: girl walks on sidewalk while talking on cellphone

Predicted Caption: woman walking while talking on the phone

Corpus BLEU
BLEU-1: 0.311975
BLEU-2: 0.176421
BLEU-3: 0.135667
BLEU-4: 0.062990

In [None]:
# Transformer parameters
num_layer = 2
d_model = 784
dff = 16384
num_heads = 8
row_size = 8
col_size = 8
target_vocab_size = top_k + 1
dropout_rate = 0.1 

# Optimizer parameters
beta1 = 0.9
beta2 = 0.98
eps = 1e-9
opt="Adam"
dec= None
lr = 'Custom'
init_acc = None

learning_rate = CustomSchedule(d_model)
optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=beta1, beta_2=beta2,
                                    epsilon=eps)

optimizer = tf.keras.optimizers.Adam(learning_rate)

loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
transformer = Transformer(num_layer,d_model,num_heads,dff,row_size,col_size,target_vocab_size, max_pos_encoding=target_vocab_size,rate=dropout_rate)    

results_df = pd.DataFrame()
results_df =  run_model(results_df)
results_df.to_csv("/content/drive/MyDrive/w266/Adam_CustomLR26_32_epochs.csv", sep='\t', encoding='utf-8')

generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()

lst = [100]
for l in lst:
    evaluate_model2(l)

**Output from script above**

6189

BLEU-1 score: 0
BLEU-2 score: 0
BLEU-3 score: 0
BLEU-4 score: 0

Real Caption: two dogs play with one another on the lawn

Predicted Caption: white dog and white and black and white dog are playing

653

BLEU-1 score: 0.2222222222222222
BLEU-2 score: 0.16666666666666669
BLEU-3 score: 0.34127875184653655
BLEU-4 score: 0.408248290463863

Real Caption: dog is biting twig

Predicted Caption: brown dog is outside with stick in its mouth

4255

BLEU-1 score: 0.3333333333333333
BLEU-2 score: 0.24618298195866542
BLEU-3 score: 0.21614978940479493
BLEU-4 score: 0.2790159393585827

Real Caption: girl in colorful clothing is jumping on bed with quilt

Predicted Caption: young girl in colorful skirt and blue shirt pounces onto her bed

2742

BLEU-1 score: 0.3031234611978214
BLEU-2 score: 0.29284508512177104
BLEU-3 score: 0.29142524726978003
BLEU-4 score: 0.260899397956404

Real Caption: two brown and white dogs fighting on grassy area in front of tree

Predicted Caption: two brown and white dogs playing outdoors

Corpus BLEU
BLEU-1: 0.310308
BLEU-2: 0.177096
BLEU-3: 0.137887
BLEU-4: 0.064120

In [None]:
# Transformer parameters
num_layer = 2
d_model = 1024
dff = 16384
num_heads = 8
row_size = 8
col_size = 8
target_vocab_size = top_k + 1
dropout_rate = 0.1 

# Optimizer parameters
beta1 = 0.9
beta2 = 0.98
eps = 1e-9
opt="Adam"
dec= None
lr = 'Custom'
init_acc = None

learning_rate = CustomSchedule(d_model)
optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=beta1, beta_2=beta2,
                                    epsilon=eps)

optimizer = tf.keras.optimizers.Adam(learning_rate)

loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
transformer = Transformer(num_layer,d_model,num_heads,dff,row_size,col_size,target_vocab_size, max_pos_encoding=target_vocab_size,rate=dropout_rate)    

results_df = pd.DataFrame()
results_df =  run_model(results_df)
results_df.to_csv("/content/drive/MyDrive/w266/Adam_CustomLR27_32_epochs.csv", sep='\t', encoding='utf-8')

generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()

lst = [100]
for l in lst:
    evaluate_model2(l)

**Output from script above**

3610

BLEU-1 score: 0.5714285714285714
BLEU-2 score: 0.3086066999241838
BLEU-3 score: 0.4939047583970314
BLEU-4 score: 0.5555238068023582

Real Caption: race dog is running for first

Predicted Caption: dog in yellow jersey is running race

3969

BLEU-1 score: 0.5454545454545454
BLEU-2 score: 0.7385489458759964
BLEU-3 score: 0.83373410944439
BLEU-4 score: 0.8593887047640296

Real Caption: two dogs play in the surf and one has red ball

Predicted Caption: two black dogs playing in surf with ball on the shore

2668

BLEU-1 score: 0.07692307692307693
BLEU-2 score: 0.2773500981126146
BLEU-3 score: 0.46325167075036716
BLEU-4 score: 0.5266403878479265

Real Caption: boy in martial uniform holding sword

Predicted Caption: two women watching people in white shirts one of those in the fans

276

BLEU-1 score: 0.22062422564614886
BLEU-2 score: 0.4412484512922977
BLEU-3 score: 0.5822308220964749
BLEU-4 score: 0.6240195441936915

Real Caption: skateboarder wearing green shirt and jumping in the air

Predicted Caption: skateboarder is performing stunt in front of crowd

Corpus BLEU
BLEU-1: 0.323432
BLEU-2: 0.181061
BLEU-3: 0.136034
BLEU-4: 0.057099


In [None]:
# Transformer parameters
num_layer = 2
d_model = 1024
dff = 16384
num_heads = 8
row_size = 8
col_size = 8
target_vocab_size = top_k + 1
dropout_rate = 0.11

# Optimizer parameters
beta1 = 0.9
beta2 = 0.98
eps = 1e-9
opt="Adam"
dec= None
lr = 'Custom'
init_acc = None

learning_rate = CustomSchedule(d_model)
optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=beta1, beta_2=beta2,
                                    epsilon=eps)

optimizer = tf.keras.optimizers.Adam(learning_rate)

loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
transformer = Transformer(num_layer,d_model,num_heads,dff,row_size,col_size,target_vocab_size, max_pos_encoding=target_vocab_size,rate=dropout_rate)    

results_df = pd.DataFrame()
results_df =  run_model(results_df)
results_df.to_csv("/content/drive/MyDrive/w266/Adam_CustomLR28_32_epochs.csv", sep='\t', encoding='utf-8')

generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()

lst = [100]
for l in lst:
    evaluate_model2(l)

**Output from script above**

6915

BLEU-1 score: 0.3309363384692233
BLEU-2 score: 0.5404167777297231
BLEU-3 score: 0.6575405946631796
BLEU-4 score: 0.6905911470987942

Real Caption: four people with shaved heads and white robes praying

Predicted Caption: religious people wearing white does dance and waving

238

BLEU-1 score: 0.09306272250443651
BLEU-2 score: 0.24622082007735305
BLEU-3 score: 0.3633660780959587
BLEU-4 score: 0.4004970149398301

Real Caption: man sitting on bench with his bicycle leaning against railing

Predicted Caption: man in hat is posing for picture

7915

BLEU-1 score: 0.4043537731417556
BLEU-2 score: 0.31321108587083024
BLEU-3 score: 0.26916921090198714
BLEU-4 score: 0.3081980909598119

Real Caption: three people standing near door with silly string cans

Predicted Caption: three children play with silly string

4754

BLEU-1 score: 0.07142857142857141
BLEU-2 score: 0.26726124191242434
BLEU-3 score: 0.45306612226568754
BLEU-4 score: 0.5169731539571706

Real Caption: man looks across city street at night

Predicted Caption: street scene shows neon lights and group of people dining while another group walks

Corpus BLEU
BLEU-1: 0.314223
BLEU-2: 0.178685
BLEU-3: 0.139263
BLEU-4: 0.065459


In [None]:
# Transformer parameters
num_layer = 2
d_model = 1024
dff = 16384
num_heads = 8
row_size = 8
col_size = 8
target_vocab_size = top_k + 1
dropout_rate = 0.09

# Optimizer parameters
beta1 = 0.9
beta2 = 0.98
eps = 1e-9
opt="Adam"
dec= None
lr = 'Custom'
init_acc = None

learning_rate = CustomSchedule(d_model)
optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=beta1, beta_2=beta2,
                                    epsilon=eps)

optimizer = tf.keras.optimizers.Adam(learning_rate)

loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
transformer = Transformer(num_layer,d_model,num_heads,dff,row_size,col_size,target_vocab_size, max_pos_encoding=target_vocab_size,rate=dropout_rate)    

results_df = pd.DataFrame()
results_df =  run_model(results_df)
results_df.to_csv("/content/drive/MyDrive/w266/Adam_CustomLR29_32_epochs.csv", sep='\t', encoding='utf-8')

generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()

lst = [100]
for l in lst:
    evaluate_model2(l)

**Output from script above**

2934

BLEU-1 score: 0.5
BLEU-2 score: 0.3779644730092272
BLEU-3 score: 0.32585561793662904
BLEU-4 score: 0.392814650900513

Real Caption: little girl climbing into wooden playhouse

Predicted Caption: little girl climbing the stairs to her playhouse

153

BLEU-1 score: 0.23884377019126307
BLEU-2 score: 0.41368954504257255
BLEU-3 score: 0.5153458656549998
BLEU-4 score: 0.5444460596606694

Real Caption: two dark haired girls looking down at something

Predicted Caption: two little girls one is smiling

5993

BLEU-1 score: 0.38940039153570244
BLEU-2 score: 0.294359027552757
BLEU-3 score: 0.25377661041726324
BLEU-4 score: 0.30592435772324006

Real Caption: man in football uniform is running with football during game

Predicted Caption: football player running with football under his arm

599

BLEU-1 score: 0.7430381997858699
BLEU-2 score: 0.5675047991270782
BLEU-3 score: 0.5107200324946619
BLEU-4 score: 0.39442436483275556

Real Caption: little boy clowns with colander on his head

Predicted Caption: boy with metal colander on his head

Corpus BLEU
BLEU-1: 0.312763
BLEU-2: 0.172044
BLEU-3: 0.132441
BLEU-4: 0.059431

In [None]:
# Transformer parameters
num_layer = 2
d_model = 1024
dff = 16384
num_heads = 4
row_size = 8
col_size = 8
target_vocab_size = top_k + 1
dropout_rate = 0.1

# Optimizer parameters
beta1 = 0.9
beta2 = 0.98
eps = 1e-9
opt="Adam"
dec= None
lr = 'Custom'
init_acc = None

learning_rate = CustomSchedule(d_model)
optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=beta1, beta_2=beta2,
                                    epsilon=eps)

optimizer = tf.keras.optimizers.Adam(learning_rate)

loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
transformer = Transformer(num_layer,d_model,num_heads,dff,row_size,col_size,target_vocab_size, max_pos_encoding=target_vocab_size,rate=dropout_rate)    

results_df = pd.DataFrame()
results_df =  run_model(results_df)
results_df.to_csv("/content/drive/MyDrive/w266/Adam_CustomLR30_32_epochs.csv", sep='\t', encoding='utf-8')

generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()

lst = [100]
for l in lst:
    evaluate_model2(l)

**Output from script above**

4495

BLEU-1 score: 0.4294155960430205
BLEU-2 score: 0.568063438111202
BLEU-3 score: 0.6353374384520296
BLEU-4 score: 0.6533657281851002

Real Caption: boy catches fish in small stream as girl points

Predicted Caption: boy fish while girl in green bikini

4716

BLEU-1 score: 0.14715177646857694
BLEU-2 score: 0.23266738769033593
BLEU-3 score: 0.279462484345493
BLEU-4 score: 0.2925637512788283

Real Caption: man is carefully skiing down snow covered hill wearing backpack

Predicted Caption: person skis down snowy hill

915

BLEU-1 score: 0.21470779802151024
BLEU-2 score: 0.4016815092325757
BLEU-3 score: 0.5160543568774933
BLEU-4 score: 0.5494128986804837

Real Caption: girl swinging from tree on rope over crashing waves

Predicted Caption: girl in red top of rope swing

111

BLEU-1 score: 0.18181818181818182
BLEU-2 score: 0.4264014327112209
BLEU-3 score: 0.5996408252050452
BLEU-4 score: 0.6529942057256104

Real Caption: many children playing outside in the grass practicing punching

Predicted Caption: boy in black is gathering around the face by another man

Corpus BLEU
BLEU-1: 0.328844
BLEU-2: 0.189616
BLEU-3: 0.142031
BLEU-4: 0.067061


In [None]:
# Transformer parameters
num_layer = 2
d_model = 1024
dff = 16384
num_heads = 2
row_size = 8
col_size = 8
target_vocab_size = top_k + 1
dropout_rate = 0.1

# Optimizer parameters
beta1 = 0.9
beta2 = 0.98
eps = 1e-9
opt="Adam"
dec= None
lr = 'Custom'
init_acc = None

learning_rate = CustomSchedule(d_model)
optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=beta1, beta_2=beta2,
                                    epsilon=eps)

optimizer = tf.keras.optimizers.Adam(learning_rate)

loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
transformer = Transformer(num_layer,d_model,num_heads,dff,row_size,col_size,target_vocab_size, max_pos_encoding=target_vocab_size,rate=dropout_rate)    

results_df = pd.DataFrame()
results_df =  run_model(results_df)
results_df.to_csv("/content/drive/MyDrive/w266/Adam_CustomLR31_32_epochs.csv", sep='\t', encoding='utf-8')

generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()
generate_sentence_bleu()

lst = [100]
for l in lst:
    evaluate_model2(l)

**Output from script above**

7368

BLEU-1 score: 0.32269606971871956
BLEU-2 score: 0.4268867747668464
BLEU-3 score: 0.4774416584373174
BLEU-4 score: 0.49098950880470227

Real Caption: woman is holding stick in the air while dog jumps up

Predicted Caption: woman with stick is playing with dog

6359

BLEU-1 score: 0.596559544542913
BLEU-2 score: 0.3653166213293175
BLEU-3 score: 0.2915884185345374
BLEU-4 score: 0.3515059938464902

Real Caption: dog running in park with rubber chicken in its mouth

Predicted Caption: white dog is running with toy in its mouth

6785

BLEU-1 score: 0.2222222222222222
BLEU-2 score: 0.4714045207910317
BLEU-3 score: 0.6368486695666434
BLEU-4 score: 0.6865890479690392

Real Caption: <unk> terrier leaps after ball

Predicted Caption: little white dog wearing leash jumping after red ball

1990

BLEU-1 score: 0.39770636302860873
BLEU-2 score: 0.2982797722714566
BLEU-3 score: 0.46288414438794134
BLEU-4 score: 0.5166357204442372

Real Caption: woman plays with long red ribbons in an empty square

Predicted Caption: two people work with long ribbons in chinese courtyard

Corpus BLEU
BLEU-1: 0.296656
BLEU-2: 0.167271
BLEU-3: 0.127351
BLEU-4: 0.056110