In [None]:
import pandas as pd
import numpy as np
import os
from keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from keras import Input, layers

In [None]:
mypath= './data/txt_data'
infile = os.path.join(mypath, 'Flickr8k.token.txt')
print(infile)

#outfile = os.path.join(mypath, 'Flickr8kmod.token.txt')
'''
fout = open(outfile, "w")
with open(infile) as fin:
    lines = fin.readlines()
    count = 0
    for line in lines:
        line = line.replace('#', '\t',1)
        
        fout.write(line)
'''       

In [None]:
myfile = os.path.join(mypath, 'Flickr8kmod.token.txt')
df_raw = pd.read_csv(myfile, '\t',header=None)
df_raw.columns = ['image_id', 'label', 'caption']

In [None]:
def read_doc(filename):
    
    file = open(filename, 'r')
    text = file.read()
    file.close()
    return text

def load_descriptions(doc):
    
    mydic = {}
    for line in doc.split('\n'):
        
        tokens = line.split()
        if len(tokens) < 2:
            continue
            
        image_id, image_desc = tokens[0], tokens[1:]
        image_id = image_id.split('.')[0]
        image_desc = ' '.join(image_desc)
        
        if image_id not in mydic:
            mydic[image_id] = []
        mydic[image_id].append(image_desc)
        
    
    return mydic

doc = read_doc(infile)
descriptions = load_descriptions(doc)
print('Loaded: %d ' % len(descriptions))
print(list(descriptions.items())[:5])
descriptions['1000268201_693b08cb0e'][0]



In [None]:
import re

def clean_descriptions(descriptions):
    clean_desc = {}
    for key, desc_list in descriptions.items():
        
        for i in range(len(desc_list)):
        
            desc = desc_list[i]
            desc = desc.lower()        
            desc = re.sub("[^a-z]+"," ", desc)        
            desc = desc.split()
            desc = [word for word in desc if len(word) > 1]
        
            desc =  ' '.join(desc)
            
            if key not in clean_desc:
                clean_desc[key] = []
            clean_desc[key].append(desc)
        
     
    return clean_desc
    
            

clean_desc = clean_descriptions(descriptions)
print('***********')
clean_desc['1000268201_693b08cb0e']

In [None]:
all_words = set()

for key in clean_desc.keys():
    
    for sentance in clean_desc[key]: 
        
        all_words.update(sentance.split()) 
print((all_words))


In [None]:
def save_descriptions(description, filename):
    lines = list()
    for key, desc_list in description.items():
        for desc in desc_list:
            lines.append(key + " " + desc)
    data = '\n'.join(lines)
    file = open(filename, 'w')
    file.write(data)
    file.close()

In [None]:
save_descriptions(clean_desc, os.path.join(mypath,'clean_descriptions.txt'))

In [None]:
def load_set(filename):
    
    file = open(filename, 'r')
    text = file.read()
    file.close()
   
    dataset = list()
    for line in text.split('\n'):
        if(len(line)) < 1:
            continue
        identifier = line.split('.')[0]
        dataset.append(identifier)
    return set(dataset)

inf = os.path.join(mypath, 'Flickr_8k.trainImages.txt')

train = load_set(inf)
print(len(train))
print(list(train)[:5])

In [None]:
import glob
img_path = './data/flicker_dataset/Flicker8k_Dataset/'
print(img_path)
img = glob.glob(img_path+'*.jpg')
img[:5]

In [None]:
train_img_path = './data/txt_data/Flickr_8k.trainImages.txt'
train_img = set(open(train_img_path, 'r').read().strip().split('\n'))
print(train_img)
train_images = []

for i in img:
    if i[len(img_path):] in train_img:
        train_images.append(i)

In [None]:
from pickle import dump, load

In [None]:
def load_clean_description(filename, dataset):
    
    file = open(filename, 'r')
    text = file.read()
    file.close()
    descriptions = dict()
    for line in text.split('\n'):
        tokens = line.split()
        image_id, image_desc = tokens[0], tokens[1:]
        if image_id in dataset:
            if image_id not in descriptions:
                descriptions[image_id] = list()
            desc = 'startseq '+' '.join(image_desc) + ' endseq'
            descriptions[image_id].append(desc)
    return descriptions
cleaned_desc = os.path.join(mypath,'clean_descriptions.txt')
train_description = load_clean_description(cleaned_desc, train)
len(train_description)

In [None]:
def dict_to_list(descriptions):
    all_desc = []
    for key in descriptions.keys():
        [all_desc.append(d) for d in descriptions[key]]
    return all_desc
#creating tokenizer class 
#this will vectorise text corpus
#each integer will represent token in dictionary
from keras.preprocessing.text import Tokenizer
def create_tokenizer(descriptions):
    desc_list = dict_to_list(descriptions)
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(desc_list)
    return tokenizer
# give each word an index, and store that into tokenizer.p pickle file
tokenizer = create_tokenizer(train_description)
dump(tokenizer, open('tokenizer.p', 'wb'))
vocab_size = len(tokenizer.word_index) + 1
vocab_size


In [None]:
# train our model
print('Dataset: ', len(train_img))
print('Descriptions: train=', len(train_description))
print('Photos: train=', len(train_features))
print('Vocabulary Size:', vocab_size)
print('Description Length: ', max_length)
model = define_model(vocab_size, max_length)


In [None]:
#calculate maximum length of descriptions
def max_length(descriptions):
    desc_list = dict_to_list(descriptions)
    return max(len(d.split()) for d in desc_list)
    
max_length = max_length(descriptions)
max_length

In [None]:
features = load(open("features.p","rb"))
train_img_path = './data/txt_data/Flickr_8k.trainImages.txt'
img_path = './data/flicker_dataset/Flicker8k_Dataset/'
train_img = set(open(train_img_path, 'r').read().strip().split('\n'))

train_features = {k:features[k] for k in train_img}

print(list(train_features)[:3])
print(list(train_description)[:3])

In [None]:
def data_generator(descriptions, features, tokenizer, max_length, batch_size):
    n = 0
    for key, description_list in descriptions.items():
        n+=1
        #retrieve photo features
        key1 = key + '.jpg'
        feature = features[key1][0]
        input_image, input_sequence, output_word = create_sequences(tokenizer, max_length, description_list, feature)
        if n==batch_size:
            yield ([input_image, input_sequence], output_word)
            X1,X2,y = [],[],[]
            n = 0
            
def create_sequences(tokenizer, max_length, desc_list, feature):
    X1, X2, y = list(), list(), list()
    # walk through each description for the image
    for desc in desc_list:
        # encode the sequence
        seq = tokenizer.texts_to_sequences([desc])[0]
        # split one sequence into multiple X,y pairs
        for i in range(1, len(seq)):
            # split into input and output pair
            in_seq, out_seq = seq[:i], seq[i]
            # pad input sequence
            in_seq = pad_sequences([in_seq], maxlen=max_length)[0]
            # encode output sequence
            out_seq = to_categorical([out_seq], num_classes=vocab_size)[0]
            # store
            X1.append(feature)
            X2.append(in_seq)
            y.append(out_seq)
            
    return np.array(X1), np.array(X2), np.array(y)
#You can check the shape of the input and output for your model
[a,b],c = next(data_generator(train_description, train_features, tokenizer, max_length, 3))
a.shape, b.shape, c.shape
#((47, 2048), (47, 32), (47, 7577))

In [None]:
a = train_description.values()
#seq = tokenizer.texts_to_sequences([train_description.values()[1]])[0]
type(a)

In [None]:
import tensorflow.keras
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Embedding, TimeDistributed, Dense, RepeatVector,\
                         Activation, Flatten, Reshape, concatenate, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam, RMSprop
from keras.layers.wrappers import Bidirectional
from keras.layers.merge import add
from tensorflow.keras.models import Model
from tensorflow.keras import Input, layers
from tensorflow.keras import optimizers
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
#from keras.utils import plot_model
from keras.utils.vis_utils import plot_model

In [None]:

# define the captioning model
def define_model(vocab_size, max_length):
    # features from the CNN model squeezed from 2048 to 256 nodes
    inputs1 = Input(shape=(1000,))
    fe1 = Dropout(0.5)(inputs1)
    fe2 = Dense(256, activation='relu')(fe1)
    # LSTM sequence model
    inputs2 = Input(shape=(max_length,))
    se1 = Embedding(vocab_size, 256, mask_zero=True)(inputs2)
    se2 = Dropout(0.5)(se1)
    se3 = LSTM(256)(se2)
    # Merging both models
    decoder1 = add([fe2, se3])
    decoder2 = Dense(256, activation='relu')(decoder1)
    outputs = Dense(vocab_size, activation='softmax')(decoder2)
    # tie it together [image, seq] [word]
    model = Model(inputs=(inputs1, inputs2), outputs=outputs)
    model.compile(loss='categorical_crossentropy', optimizer='adam')
    # summarize model
    print(model.summary())
    #plot_model(model, to_file='model.png', show_shapes=True)
    return model

In [None]:

epochs = 10
batch_size = 6
#model.optimizer.lr = 0.0001
number_pics_per_batch = 6
#steps = len(train_description)

steps = len(train_description)//number_pics_per_batch
# making a directory models to save our models
#os.mkdir("models")
generator = data_generator(train_description, train_features, tokenizer, max_length, batch_size)
for i in range(epochs):
    model.fit(generator, epochs=6, steps_per_epoch=steps, verbose=1)
    model.save("models/model_" + str(i) + ".h5")

In [None]:
model.optimizer.lr = 0.0001
epochs = 10
number_pics_per_bath = 6
steps = len(train_description)//number_pics_per_bath
steps

In [None]:
for i in range(epochs):
    generator = data_generator(train_description, train_features, tokenizer, max_length)
    model.fit(generator, epochs=6, steps_per_epoch=steps, verbose=1)
    model.save('./model_weights/model_' + str(i) + '.h5')