In [None]:
import string
import numpy as np
from PIL import Image
import os
from pickle import dump, load
import numpy as np
from keras.applications.xception import Xception, preprocess_input
from keras.preprocessing.image import load_img, img_to_array
from keras_hub.tokenizers import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
from keras.layers import add
from keras.models import Model, load_model
from keras.layers import Input, Dense, LSTM, Embedding, Dropout
# small library for seeing the progress of loops.
from tqdm import tqdm_notebook as tqdm
#tqdm().pandas()

In [None]:
# --- Robust Dataset setup (auto-detect common Flickr8k filenames) ---\nfrom google.colab import drive\nimport os, re, fnmatch, textwrap\n\ndrive.mount('/content/drive', force_remount=False)\n\n# Try to auto-find the Flickr8k text folder under My Drive if user didn't edit this\nbase = '/content/drive/My Drive'\ncandidate_dirs = []\ntry:\n    for name in os.listdir(base):\n        if re.search(r'flickr', name, re.I) or re.search(r'flickr8k', name, re.I) or re.search(r'8k', name):\n            candidate_dirs.append(os.path.join(base, name))\nexcept FileNotFoundError:\n    candidate_dirs = []\n\n# If we found candidate folders, prefer the one containing the expected files\ndataset_text = None\nexpected_keywords = ['Flickr_8k.trainImages', 'Flickr_8k.devImages', 'Flickr_8k.testImages', 'ExpertAnnotations', 'Flickr8k']\nfor d in candidate_dirs:\n    try:\n        files = os.listdir(d)\n    except Exception:\n        continue\n    joined = ' '.join(files)\n    if any(k in joined for k in expected_keywords):\n        dataset_text = d\n        break\n\n# Fallback: common folder name\nif dataset_text is None:\n    dataset_text = os.path.join(base, 'Flickr8k_text')\n\nprint("\nUsing dataset_text =", dataset_text)\nprint("Files in dataset folder (if mounted correctly):")\ntry:\n    for f in sorted(os.listdir(dataset_text)):\n        print(" -", f)\nexcept Exception as e:\n    print("  (could not list folder contents: {})".format(e))\n    print("\nIf the path above is incorrect, edit the 'dataset_text' variable to point to your Drive folder.")\n\ndef pick_file(patterns):\n    try:\n        files = os.listdir(dataset_text)\n    except Exception:\n        return None\n    for pat in patterns:\n        prog = re.compile(pat, re.I)\n        for f in files:\n            if prog.search(f):\n                return os.path.join(dataset_text, f)\n    return None\n\ntrain_list_file = pick_file([r'Flickr[_\\- ]?8k\\.trainImages', r'trainImages', r'train'])\ndev_list_file   = pick_file([r'Flickr[_\\- ]?8k\\.devImages', r'devImages', r'dev'])\ntest_list_file  = pick_file([r'Flickr[_\\- ]?8k\\.testImages', r'testImages', r'test'])\ndescriptions_file = pick_file([r'ExpertAnnotations', r'descriptions', r'captions', r'annotations'])\ntoken_file = pick_file([r'lemma.*token', r'Flickr8k.*token', r'_Flickr8k.*token', r'token'])\n\nfeatures_file = None\nfeatures_file = pick_file([r'features\\.p', r'features.*\\.p'])\nif features_file is None:\n    fd = os.path.join(base, 'features.p')\n    if os.path.exists(fd):\n        features_file = fd\n\nprint("\nAuto-detected file paths:")\nprint(" train_list_file   ->", train_list_file)\nprint(" dev_list_file     ->", dev_list_file)\nprint(" test_list_file    ->", test_list_file)\nprint(" descriptions_file ->", descriptions_file)\nprint(" token_file        ->", token_file)\nprint(" features_file     ->", features_file)\n\nmissing = [name for name, val in [\n    ('train_list_file', train_list_file),\n    ('dev_list_file', dev_list_file),\n    ('test_list_file', test_list_file),\n    ('descriptions_file', descriptions_file),\n] if val is None]\n\nif missing:\n    print("\nWARNING: Some expected files were NOT found automatically:", missing)\n    print(" -> If files are missing, either (a) move them into the folder listed above, or (b) edit the variables below to point to the exact filenames.")\nelse:\n    print("\nAll essential files were detected. You can still override the variables below if you want.")\n\ntrain_list_file = train_list_file or os.path.join(dataset_text, 'Flickr_8k.trainImages.txt')\ndev_list_file   = dev_list_file   or os.path.join(dataset_text, 'Flickr_8k.devImages.txt')\ntest_list_file  = test_list_file  or os.path.join(dataset_text, 'Flickr_8k.testImages.txt')\ndescriptions_file = descriptions_file or os.path.join(dataset_text, 'ExpertAnnotations.txt')\ntoken_file = token_file or os.path.join(dataset_text, '_Flickr8k.lemma.token.txt')\nfeatures_file = features_file or os.path.join(dataset_text, 'features.p')\n\nprint("\nFinal paths to be used (you may edit these variables if needed):")\nprint(" dataset_text       =", dataset_text)\nprint(" train_list_file    =", train_list_file)\nprint(" dev_list_file      =", dev_list_file)\nprint(" test_list_file     =", test_list_file)\nprint(" descriptions_file  =", descriptions_file)\nprint(" token_file         =", token_file)\nprint(" features_file      =", features_file)\n

In [None]:

# --- USER-OVERRIDE: Corrected hard-coded paths (token filename includes leading dot+underscore) ---
# Set this to the exact folder where your Flickr8k text files live (from your screenshots)
dataset_text = '/content/drive/My Drive/Flickr8k_text'   # <-- corrected to the folder shown in your screenshots

# Exact filenames (edit if necessary)
train_list_file = dataset_text + '/Flickr_8k.trainImages.txt'
dev_list_file   = dataset_text + '/Flickr_8k.devImages.txt'
test_list_file  = dataset_text + '/Flickr_8k.testImages.txt'
descriptions_file = dataset_text + '/ExpertAnnotations.txt'

# NOTE: your Drive shows a file named '._Flickr8k.lemma.token.txt' (leading dot+underscore).
# Use that exact name so the notebook won't fail to find it.
token_file = dataset_text + '/._Flickr8k.lemma.token.txt'   # corrected exact filename

# features file (change if located elsewhere)
features_file = dataset_text + '/features.p'

print("\n--- Using hard-coded paths now ---")
print(" dataset_text      =", dataset_text)
print(" descriptions_file =", descriptions_file)
print(" token_file        =", token_file)
print(" features_file     =", features_file)

# Print directory contents so you can visually confirm the filenames in Colab
print("\nFiles inside dataset_text folder:")
import os
try:
    for f in sorted(os.listdir(dataset_text)):
        print(' -', f)
except Exception as e:
    print('Could not list folder contents:', e)
    print('Double-check that you mounted Drive in Colab and that the folder path above is exact.')



--- Using hard-coded paths now ---
 dataset_text      = /content/drive/My Drive/Flickr8k_text
 descriptions_file = /content/drive/My Drive/Flickr8k_text/ExpertAnnotations.txt
 token_file        = /content/drive/My Drive/Flickr8k_text/._Flickr8k.lemma.token.txt
 features_file     = /content/drive/My Drive/Flickr8k_text/features.p

Files inside dataset_text folder:
Could not list folder contents: [Errno 2] No such file or directory: '/content/drive/My Drive/Flickr8k_text'
Double-check that you mounted Drive in Colab and that the folder path above is exact.


In [None]:
# --- Dataset setup (Google Drive + Flickr8k paths) ---
from google.colab import drive
drive.mount('/content/drive')

# Set this to the folder in your Google Drive that contains the Flickr files
dataset_text = '/content/drive/My Drive/Flickr8k_text'  # <-- change if your folder name differs

# Use the exact filenames from your Drive
train_list_file = dataset_text + '/Flickr_8k.trainImages.txt'
dev_list_file = dataset_text + '/Flickr_8k.devImages.txt'
test_list_file = dataset_text + '/Flickr_8k.testImages.txt'

# Captions file (annotations)
descriptions_file = dataset_text + '/ExpertAnnotations.txt'

# Token file (lemma tokens)
token_file = dataset_text + '/Flickr8k.lemma.token.txt'

# Features file (update this path if features.p is in another folder)
features_file = dataset_text + '/features.p'


Mounted at /content/drive


In [None]:
# Loading a text file into memory
def load_doc(filename):
    # Opening the file as read only
    file = open(filename, 'r')
    text = file.read()
    file.close()
    return text
# get all imgs with their captions
def all_img_captions(filename):
    file = load_doc(filename)
    captions = file.split('\n')
    descriptions ={}
    for caption in captions[:-1]:
        img, caption = caption.split('\t')
        if img[:-2] not in descriptions:
            descriptions[img[:-2]] = [ caption ]
        else:
            descriptions[img[:-2]].append(caption)
    return descriptions
#Data cleaning- lower casing, removing puntuations and words containing numbers
def cleaning_text(captions):
    table = str.maketrans('','',string.punctuation)
    for img,caps in captions.items():
        for i,img_caption in enumerate(caps):
            img_caption = img_caption.replace("-"," ")
            desc = img_caption.split()
            #converts to lowercase
            desc = [word.lower() for word in desc]
            #remove punctuation from each token
            desc = [word.translate(table) for word in desc]
            #remove hanging 's and a
            desc = [word for word in desc if(len(word)>1)]
            #remove tokens with numbers in them
            desc = [word for word in desc if(word.isalpha())]
            #convert back to string
            img_caption = ' '.join(desc)
            captions[img][i]= img_caption
    return captions
def text_vocabulary(descriptions):
    # build vocabulary of all unique words
    vocab = set()
    for key in descriptions.keys():
        [vocab.update(d.split()) for d in descriptions[key]]
    return vocab
#All descriptions in one file
def save_descriptions(descriptions, filename):
    lines = list()
    for key, desc_list in descriptions.items():
        for desc in desc_list:
            lines.append(key + '\t' + desc )
    data = "\n".join(lines)
    file = open(filename,"w")
    file.write(data)
    file.close()
# Set these path according to project folder in you system
# dataset_text = "/content/drive/My Drive/Colab Notebooks/flicker8k" # This line is not needed as dataset_text is defined in a previous cell
dataset_images = "/content/drive/MyDrive/flicker8k/Images"
#we prepare our text data
filename = token_file # Use the token_file variable from the previous cell
#loading the file that contains all data
#mapping them into descriptions dictionary img to 5 captions
descriptions = all_img_captions(filename)
print("Length of descriptions =" ,len(descriptions))
#cleaning the descriptions
clean_descriptions = cleaning_text(descriptions)
#building vocabulary
vocabulary = text_vocabulary(clean_descriptions)
print("Length of vocabulary = ", len(vocabulary))
#saving each description to file
save_descriptions(clean_descriptions, dataset_text + "/" +"descriptions.txt")

Length of descriptions = 8092
Length of vocabulary =  6375


In [None]:
def extract_features_in_batches(directory, save_path, batch_size=500):
    model = Xception(include_top=False, pooling='avg')
    features = {}

    all_imgs = [f for f in os.listdir(directory) if f.lower().endswith(('.jpg','.jpeg','.png'))]

    for i in range(0, len(all_imgs), batch_size):
        batch = all_imgs[i:i+batch_size]
        print(f"\nProcessing batch {i//batch_size + 1} / {len(all_imgs)//batch_size + 1}")

        for img in tqdm(batch):
            filename = os.path.join(directory, img)
            try:
                image = Image.open(filename).convert("RGB").resize((299,299))
                image = np.array(image, dtype=np.float32)
                image = np.expand_dims(image, axis=0)
                image = preprocess_input(image)
                feature = model.predict(image, verbose=0)
                features[img] = feature.reshape(-1)
            except Exception as e:
                print("Skipping", img, ":", e)

        # Save after each batch
        with open(save_path, "wb") as f:
            dump(features, f)
        print("💾 Saved checkpoint:", len(features))

    return features


In [None]:
dataset_images = "/content/drive/MyDrive/flicker8k/Images"   # adjust if needed
features_file = "/content/drive/MyDrive/flicker8k/features.p" # Explicitly set the path where features will be saved

features = extract_features_in_batches(dataset_images, features_file, batch_size=500)


Processing batch 1 / 17


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for img in tqdm(batch):


  0%|          | 0/500 [00:00<?, ?it/s]

💾 Saved checkpoint: 500

Processing batch 2 / 17


  0%|          | 0/500 [00:00<?, ?it/s]

💾 Saved checkpoint: 1000

Processing batch 3 / 17


  0%|          | 0/500 [00:00<?, ?it/s]

💾 Saved checkpoint: 1500

Processing batch 4 / 17


  0%|          | 0/500 [00:00<?, ?it/s]

💾 Saved checkpoint: 2000

Processing batch 5 / 17


  0%|          | 0/500 [00:00<?, ?it/s]

💾 Saved checkpoint: 2500

Processing batch 6 / 17


  0%|          | 0/500 [00:00<?, ?it/s]

💾 Saved checkpoint: 3000

Processing batch 7 / 17


  0%|          | 0/500 [00:00<?, ?it/s]

💾 Saved checkpoint: 3500

Processing batch 8 / 17


  0%|          | 0/500 [00:00<?, ?it/s]

💾 Saved checkpoint: 4000

Processing batch 9 / 17


  0%|          | 0/500 [00:00<?, ?it/s]

💾 Saved checkpoint: 4500

Processing batch 10 / 17


  0%|          | 0/500 [00:00<?, ?it/s]

💾 Saved checkpoint: 5000

Processing batch 11 / 17


  0%|          | 0/500 [00:00<?, ?it/s]

💾 Saved checkpoint: 5500

Processing batch 12 / 17


  0%|          | 0/500 [00:00<?, ?it/s]

💾 Saved checkpoint: 6000

Processing batch 13 / 17


  0%|          | 0/500 [00:00<?, ?it/s]

💾 Saved checkpoint: 6500

Processing batch 14 / 17


  0%|          | 0/500 [00:00<?, ?it/s]

💾 Saved checkpoint: 7000

Processing batch 15 / 17


  0%|          | 0/500 [00:00<?, ?it/s]

💾 Saved checkpoint: 7500

Processing batch 16 / 17


  0%|          | 0/500 [00:00<?, ?it/s]

💾 Saved checkpoint: 8000

Processing batch 17 / 17


  0%|          | 0/91 [00:00<?, ?it/s]

💾 Saved checkpoint: 8091


In [None]:
from pickle import dump

save_path = "/content/drive/MyDrive/flicker8k/features.p"  # permanent file in Drive

with open(save_path, "wb") as f:
    dump(features, f)

print("✅ Features saved to", save_path)

✅ Features saved to /content/drive/MyDrive/flicker8k/features.p


In [None]:
features = load(open(features_file,"rb"))

In [None]:
!mv /content/features.p "/content/drive/My Drive/Colab Notebooks/flicker8k"

mv: cannot stat '/content/features.p': No such file or directory


In [None]:
#load the data
def load_photos(filename):
    file = load_doc(filename)
    photos = file.split("\n")[:-1]
    return photos
def load_clean_descriptions(filename, photos):
    #loading clean_descriptions
    file = load_doc(filename)
    descriptions = {}
    for line in file.split("\n"):
        words = line.split()
        if len(words)<1 :
            continue
        image, image_caption = words[0], words[1:]
        if image in photos:
            if image not in descriptions:
                descriptions[image] = []
            desc = '<start> ' + " ".join(image_caption) + ' <end>'
            descriptions[image].append(desc)
    return descriptions
def load_features(photos):
    #loading all features
    all_features = load(open("/content/drive/MyDrive/flicker8k/features.p","rb"))
    #selecting only needed features
    features = {k:all_features[k] for k in photos}
    return features
filename = dataset_text + "/" + "Flickr_8k.trainImages.txt"
#train = loading_data(filename)
train_imgs = load_photos(filename)
train_descriptions = load_clean_descriptions(dataset_text + "/" +"descriptions.txt", train_imgs)
train_features = load_features(train_imgs)

In [None]:
#converting dictionary to clean list of descriptions
def dict_to_list(descriptions):
    all_desc = []
    for key in descriptions.keys():
        [all_desc.append(d) for d in descriptions[key]]
    return all_desc
#creating tokenizer class
#this will vectorise text corpus
#each integer will represent token in dictionary
#from keras_hub.tokenizers import Tokenizer
from tensorflow.keras.preprocessing.text import Tokenizer
def create_tokenizer(descriptions):
    desc_list = dict_to_list(descriptions)
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(desc_list)
    return tokenizer
# give each word an index, and store that into tokenizer.p pickle file
tokenizer = create_tokenizer(train_descriptions)

# Construct the full path for the tokenizer file using dataset_text
tokenizer_path = os.path.join(dataset_text, 'tokenizer.p')

# Save the tokenizer to the constructed path
dump(tokenizer, open(tokenizer_path, 'wb'))

vocab_size = len(tokenizer.word_index) + 1
vocab_size

5520

In [None]:
#calculate maximum length of descriptions
def max_length(descriptions):
    desc_list = dict_to_list(descriptions)
    return max(len(d.split()) for d in desc_list)

max_length = max_length(descriptions)
max_length

33

In [None]:
def data_generator(descriptions, features, tokenizer, max_length):
    while 1:
        for key, description_list in descriptions.items():
            #retrieve photo features
            feature = features[key] # Get the feature array
            # Ensure the feature is a 1D array of size 2048
            if feature.shape == (1, 2048):
                feature = np.squeeze(feature)
            elif feature.shape != (2048,):
                 # Reshape to (2048,) if it's not already
                 feature = feature.reshape(2048,)


            # walk through each description for the image
            for desc in description_list:
                # encode the sequence
                seq = tokenizer.texts_to_sequences([desc])[0]
                # split one sequence into multiple X,y pairs
                for i in range(1, len(seq)):
                    # split into input and output pair
                    in_seq, out_seq = seq[:i], seq[i]
                    # pad input sequence
                    in_seq = pad_sequences([in_seq], maxlen=max_length)[0]
                    # encode output sequence
                    out_seq = to_categorical([out_seq], num_classes=vocab_size)[0]
                    # yield individual sample with expanded feature dimension
                    yield (np.expand_dims(feature, axis=0), np.expand_dims(in_seq, axis=0)), np.expand_dims(out_seq, axis=0)

In [None]:
from keras.utils import plot_model
# define the captioning model
def define_model(vocab_size, max_length):
    # features from the CNN model squeezed from 2048 to 256 nodes
    inputs1 = Input(shape=(2048,))
    fe1 = Dropout(0.5)(inputs1)
    fe2 = Dense(256, activation='relu')(fe1)
    # LSTM sequence model
    inputs2 = Input(shape=(max_length,))
    se1 = Embedding(vocab_size, 256, mask_zero=True)(inputs2)
    se2 = Dropout(0.5)(se1)
    se3 = LSTM(256, use_cudnn=False)(se2) # Add use_cudnn=False here
    # Merging both models
    decoder1 = add([fe2, se3])
    decoder2 = Dense(256, activation='relu')(decoder1)
    outputs = Dense(vocab_size, activation='softmax')(decoder2)
    # tie it together [image, seq] [word]
    model = Model(inputs=[inputs1, inputs2], outputs=outputs)
    model.compile(loss='categorical_crossentropy', optimizer='adam')
    # summarize model
    print(model.summary())
    plot_model(model, to_file='model.png', show_shapes=True)
    return model

In [None]:
# train our model
print('Dataset: ', len(train_imgs))
print('Descriptions: train=', len(train_descriptions))
print('Photos: train=', len(train_features))
print('Vocabulary Size:', vocab_size)
print('Description Length: ', max_length)
model = define_model(vocab_size, max_length)
epochs = 10
steps = len(train_descriptions)
# making a directory models to save our models
os.makedirs("models",exist_ok=True)
for i in range(epochs):
    generator = data_generator(train_descriptions, train_features, tokenizer, max_length)
    model.fit(generator, epochs=1, steps_per_epoch= steps, verbose=1)
    model.save_weights("/content/drive/MyDrive/flickr8k/caption_weights_10.weights.h5")
    model.save("/content/drive/MyDrive/flickr8k/caption_model.keras") # Changed extension to .keras
    print("✅ Model saved: weights (.h5) + full TF model")

Dataset:  6000
Descriptions: train= 6000
Photos: train= 6000
Vocabulary Size: 5520
Description Length:  33


None
[1m6000/6000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 4ms/step - loss: 6.2533
✅ Model saved: weights (.h5) + full TF model
[1m6000/6000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 4ms/step - loss: 4.5777
✅ Model saved: weights (.h5) + full TF model
[1m6000/6000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 4ms/step - loss: 4.2108
✅ Model saved: weights (.h5) + full TF model
[1m6000/6000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 4ms/step - loss: 3.9114
✅ Model saved: weights (.h5) + full TF model
[1m6000/6000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 4ms/step - loss: 3.7931
✅ Model saved: weights (.h5) + full TF model
[1m6000/6000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 4ms/step - loss: 3.5487
✅ Model saved: weights (.h5) + full TF model
[1m6000/6000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 4ms/step - loss: 3.3000
✅ Model saved: weights (.h5) + full TF model
[1m6000/6000[0m [32m━━━━━━

In [None]:
print("✅ Model saved: weights (.h5) + full TF model")

✅ Model saved: weights (.h5) + full TF model
