In [None]:
import os
import pickle
import numpy as np
from tqdm.notebook import tqdm
from tensorflow.keras.applications.xception import Xception, preprocess_input
#from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model
from tensorflow.keras.utils import to_categorical, plot_model
from tensorflow.keras.layers import Input, Dense, LSTM, Embedding, Dropout, add
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:

directory = '/content/drive/My Drive/myfolder/Final project/Flickr8K/Images'

In [None]:
# Load the Model
model = Xception()

# Restructure model
model = Model(inputs = model.inputs , outputs = model.layers[-2].output)

# Summerize
print(model.summary())

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/xception/xception_weights_tf_dim_ordering_tf_kernels.h5
Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 299, 299, 3)]        0         []                            
                                                                                                  
 block1_conv1 (Conv2D)       (None, 149, 149, 32)         864       ['input_1[0][0]']             
                                                                                                  
 block1_conv1_bn (BatchNorm  (None, 149, 149, 32)         128       ['block1_conv1[0][0]']        
 alization)                                                                                       
                                                      

In [None]:
# Extract features from images stored in Google Drive
features = {}
directory = '/content/drive/My Drive/myfolder/Final project/Flickr8K/Images'  # Update directory path

for img_name in tqdm(os.listdir(directory)):
    # Load the image from file
    img_path = os.path.join(directory, img_name)
    image = load_img(img_path, target_size=(299, 299))
    # Convert image pixels to numpy array
    image = img_to_array(image)
    # Reshape data for model
    image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
    # Preprocess image for Xception
    image = preprocess_input(image)
    # Extract features
    feature = model.predict(image, verbose=0)
    # Get image ID
    image_id = img_name.split('.')[0]
    # Store feature
    features[image_id] = feature


  0%|          | 0/8107 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
# Define the path to save the features pickle file in your Google Drive
output_path = '/content/drive/My Drive/myfolder/Final project/Flickr8K/features.pkl'

# Store features in pickle directly to your Google Drive
with open(output_path, 'wb') as f:
    pickle.dump(features, f)


In [None]:
# Define the path to load the features pickle file from your Google Drive
input_path = '/content/drive/My Drive/myfolder/Final project/Flickr8K/features.pkl'

# Load features from pickle file in your Google Drive
with open(input_path, 'rb') as f:
    features = pickle.load(f)


In [None]:
# Define the path to the captions text file in your Google Drive
captions_path = '/content/drive/My Drive/myfolder/Final project/Flickr8K/captions.txt'

# Read captions from the text file in your Google Drive
with open(captions_path, 'r') as f:
    next(f)  # Skip the header if present
    captions_doc = f.read()


In [None]:
# create mapping of image to captions
mapping = {}
# process lines
for line in tqdm(captions_doc.split('\n')):
    # split the line by comma(,)
    tokens = line.split(',')
    if len(line) < 2:
        continue
    image_id, caption = tokens[0], tokens[1:]
    # remove extension from image ID
    image_id = image_id.split('.')[0]
    # convert caption list to string
    caption = " ".join(caption)
    # create list if needed
    if image_id not in mapping:
        mapping[image_id] = []
    # store the caption
    mapping[image_id].append(caption)

In [None]:
len(mapping)

In [None]:
def clean(mapping):
    for key, captions in mapping.items():
        for i in range(len(captions)):
            # take one caption at a time
            caption = captions[i]
            # preprocessing steps
            # convert to lowercase
            caption = caption.lower()
            # delete digits, special chars, etc.,
            caption = caption.replace('[^A-Za-z]', '')
            # delete additional spaces
            caption = caption.replace('\s+', ' ')
            # add start and end tags to the caption
            caption = 'startseq ' + " ".join([word for word in caption.split() if len(word)>1]) + ' endseq'
            captions[i] = caption

In [None]:
# before preprocess of text
mapping['1000268201_693b08cb0e']

In [None]:
# preprocess the text
clean(mapping)

In [None]:
# after preprocess of text
mapping['1000268201_693b08cb0e']

In [None]:
all_captions = []
for key in mapping:
    for caption in mapping[key]:
        all_captions.append(caption)

In [None]:
len(all_captions)

In [None]:
all_captions[:10]

In [None]:
# tokenize the text
tokenizer = Tokenizer()
tokenizer.fit_on_texts(all_captions)
vocab_size = len(tokenizer.word_index) + 1

In [None]:
vocab_size

In [None]:
# get maximum length of the caption available
max_length = max(len(caption.split()) for caption in all_captions)
max_length

In [None]:
## Train Test Split
image_ids = list(mapping.keys())
split = int(len(image_ids) * 0.90)
train = image_ids[:split]
test = image_ids[split:]

In [None]:
# create data generator to get data in batch
def data_generator(data_keys, mapping, features, tokenizer, max_length, vocab_size, batch_size):
    # loop over images
    X1, X2, y = list(), list(), list()
    n = 0
    while 1:
        for key in data_keys:
            n += 1
            captions = mapping[key]
            # process each caption
            for caption in captions:
                # encode the sequence
                seq = tokenizer.texts_to_sequences([caption])[0]
                # split the sequence into X, y pairs
                for i in range(1, len(seq)):
                    # split into input and output pairs
                    in_seq, out_seq = seq[:i], seq[i]
                    # pad input sequence
                    in_seq = pad_sequences([in_seq], maxlen=max_length)[0]
                    # encode output sequence
                    out_seq = to_categorical([out_seq], num_classes=vocab_size)[0]

                    # store the sequences
                    X1.append(features[key][0])
                    X2.append(in_seq)
                    y.append(out_seq)
            if n == batch_size:
                X1, X2, y = np.array(X1), np.array(X2), np.array(y)
                yield [X1, X2], y
                X1, X2, y = list(), list(), list()
                n = 0

In [None]:
# encoder model
# image feature layers
inputs1 = Input(shape=(2048,))
fe1 = Dropout(0.4)(inputs1)
fe2 = Dense(256, activation='relu')(fe1)
# sequence feature layers
inputs2 = Input(shape=(max_length,))
se1 = Embedding(vocab_size, 256, mask_zero=True)(inputs2)
se2 = Dropout(0.4)(se1)
se3 = LSTM(256)(se2)

# decoder model
decoder1 = add([fe2, se3])
decoder2 = Dense(256, activation='relu')(decoder1)
outputs = Dense(vocab_size, activation='softmax')(decoder2)+


model = Model(inputs=[inputs1, inputs2], outputs=outputs)
model.compile(loss='categorical_crossentropy', optimizer='adam')

# plot the model
plot_model(model, show_shapes=True)

In [None]:
# train the model
epochs = 70
batch_size = 32
steps = len(train) // batch_size

for i in range(epochs):
    # create data generator
    generator = data_generator(train, mapping, features, tokenizer, max_length, vocab_size, batch_size)
    # fit for one epoch
    model.fit(generator, epochs=1, steps_per_epoch=steps, verbose=1)


In [None]:
# Define the path to save the model in your Google Drive
model_path = '/content/drive/My Drive/myfolder/Final project/Flickr8K/best_model.h5'

# Save the model to your Google Drive
model.save(model_path)

In [None]:
from tensorflow.keras.models import load_model

# Load the model
model = load_model(model_path)

# You can now use the loaded_model for prediction or any other purposes


In [None]:
def idx_to_word(integer, tokenizer):
    for word, index in tokenizer.word_index.items():
        if index == integer:
            return word
    return None

In [None]:
# generate caption for an image
def predict_caption(model, image, tokenizer, max_length):
    # add start tag for generation process
    in_text = 'startseq'
    # iterate over the max length of sequence
    for i in range(max_length):
        # encode input sequence
        sequence = tokenizer.texts_to_sequences([in_text])[0]
        # pad the sequence
        sequence = pad_sequences([sequence], max_length)
        # predict next word
        yhat = model.predict([image, sequence], verbose=0)
        # get index with high probability
        yhat = np.argmax(yhat)
        # convert index to word
        word = idx_to_word(yhat
                           , tokenizer)
        # stop if word not found
        if word is None:
            break
        # append word as input for generating next word
        in_text += " " + word
        # stop if we reach end tag
        if word == 'endseq':
            break

    return in_text

In [None]:
from nltk.translate.bleu_score import corpus_bleu
# validate with test data
actual, predicted = list(), list()

for key in tqdm(test):
    # get actual caption
    captions = mapping[key]
    # predict the caption for image
    y_pred = predict_caption(model, features[key], tokenizer, max_length)
    # split into words
    actual_captions = [caption.split() for caption in captions]
    y_pred = y_pred.split()
    # append to the list
    actual.append(actual_captions)
    predicted.append(y_pred)

# calcuate BLEU score
print("BLEU-1: %f" % corpus_bleu(actual, predicted, weights=(1.0, 0, 0, 0)))
print("BLEU-2: %f" % corpus_bleu(actual, predicted, weights=(0.5, 0.5, 0, 0)))


In [None]:
import tkinter as tk
from tkinter import *
from tkinter import filedialog
from PIL import Image, ImageTk
import time



def upload_image():
    filename = filedialog.askopenfilename(initialdir="/home/sw900b2_arjun/final_project/Images/", title="Select Image",
                                           filetypes=(("Image files", "*.jpg *.jpeg *.png *.gif"), ("All files", "*.*")))
    if filename:
        img_id= filename.split("/")[-1]
        con1.set(img_id)
        display_image(filename)

def display_image(filename):
    image = Image.open(filename)
    image.thumbnail((500, 500))
    photo = ImageTk.PhotoImage(image)
    image_label.config(image=photo)
    image_label.image = photo

from PIL import Image
import matplotlib.pyplot as plt
def generate_caption():
    # load the image
    image_name = con1.get()
    image_id = image_name.split('.')[0]
    img_path = os.path.join(BASE_DIR, "Images", image_name)
    image = Image.open(img_path)
    captions = mapping[image_id]
    y_pred = predict_caption(model, features[image_id], tokenizer, max_length)
    con2.set(y_pred)


    def display_text():
        msg = y_pred
        for ch in msg:
            label.config(text=label.cget("text") + ch)
            label.update()
            time.sleep(0.3)
    label = tk.Label(root, text="")
    label.grid(row=7,column=0,pady=10)
    display_text()

# Main tkinter window
root = tk.Tk()
root.title("Image Caption Generator")

con1 = StringVar()
con2 = StringVar()
#imageid
v1 = Entry(root, textvariable=con1)
v2=tk.Label(root,text="Image_Name")
v1.grid(row=4, column=0, pady=10)
v2.grid(row=3,column=0)
"""
#caption
c1 = Entry(root, textvariable=con2)
c2=tk.Label(root,text="Caption")
c1.grid(row=1, column=2, pady=10)
c2.grid(row=1,column=1)

"""

# Upload Button
upload_button = tk.Button(root, text="Upload Image", command=upload_image)
upload_button.grid(row=0, column=0, pady=10)

# Image display
image_label = tk.Label(root)
image_label.grid(row=1, column=0)



# Save Button
save_button = tk.Button(root, text="Caption Generator", command=lambda : generate_caption())
save_button.grid(row=6, column=0, pady=5)

root.mainloop()