In [18]:
# === Cell A (top) ===
import os
import tensorflow as tf
import shutil

# folder for checkpoints
os.makedirs('saved_models_tf', exist_ok=True)

# Callback: save a .keras single-file each epoch (Keras-3 friendly).
# Optionally also try model.export(...) if available (for TF-serving), but .keras is primary.
class KerasEpochSaver(tf.keras.callbacks.Callback):
    def __init__(self, folder='saved_models_tf', keep_last=3):
        super().__init__()
        self.folder = folder
        os.makedirs(self.folder, exist_ok=True)
        self.keep_last = keep_last

    def on_epoch_end(self, epoch, logs=None):
        base = os.path.join(self.folder, f'epoch_{epoch+1}')
        # remove previous directory for this epoch (optional)
        if os.path.exists(base):
            shutil.rmtree(base)
        # try to export TF-style directory (optional)
        try:
            # available in some Keras 3 builds
            self.model.export(base)
            print(f"Exported TF artifact: {base}")
        except Exception:
            pass
        # ALWAYS save a single-file .keras (Keras-3 native)
        keras_path = base + '.keras'
        try:
            self.model.save(keras_path)   # writes single .keras file
            print(f"Saved .keras checkpoint: {keras_path}")
        except Exception as e:
            print("Failed to save .keras checkpoint:", e)

        # optional: keep only last N .keras files to limit disk
        files = sorted([p for p in os.listdir(self.folder) if p.endswith('.keras')],
                       key=lambda n: os.path.getmtime(os.path.join(self.folder, n)))
        if len(files) > self.keep_last:
            to_rm = files[:len(files) - self.keep_last]
            for rm in to_rm:
                try:
                    os.remove(os.path.join(self.folder, rm))
                except Exception:
                    pass

# instantiate and export the callback (use this in model.fit)
save_callback = KerasEpochSaver(folder='saved_models_tf', keep_last=3)

# CONTROL SWITCH
# True  = force fresh start
# False = try to auto-resume from latest .keras (preferred)
START_FRESH = False


In [21]:
# Run in terminal or in a notebook cell with a leading !
!pip install -q tensorflow pillow tqdm nltk



[notice] A new release of pip is available: 25.1.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [22]:
# Cell 1 - Explanation + imports
# Simple explanation: we'll use TensorFlow for the model, PIL for images, nltk for simple text tools.
import os, json, random, pickle
from glob import glob
from tqdm import tqdm
import numpy as np
from PIL import Image

import tensorflow as tf
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Dropout, Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

import nltk
nltk.download('punkt')
from nltk.translate.bleu_score import corpus_bleu

print("All imports done. TF version:", tf.__version__)


All imports done. TF version: 2.19.0


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\yazda\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Cell 2 — Paths and basic settings

What this does: tells the notebook where your images and captions are, and sets a few training parameters.
Edit paths here if your folders are in different places.

In [23]:
# Cell 2 - Paths & basic settings (edit if your folders are elsewhere)
IMAGES_DIR = r'E:\EVOASTRA INTERNSHIP\Major_Project [ Caption Generation ]_16_11_2025\Major_Project_Caption_Generation_118k\train2017\train2017'                     # folder with the COCO images
ANNOTATIONS_FILE = r'E:\EVOASTRA INTERNSHIP\Major_Project [ Caption Generation ]_16_11_2025\Major_Project_Caption_Generation_118k\annotations_trainval2017\annotations\captions_train2017.json'  # COCO captions json
FEATURES_DIR = './features'                           # where we will save image features
TOKENIZER_PATH = './tokenizer.pickle'
# MODEL_PATH = './caption_model.h5'
MODEL_PATH = './caption_model.keras'
os.makedirs(FEATURES_DIR, exist_ok=True)

# Simple hyperparameters (beginners can try these)
IMG_SIZE = (299, 299)   # input size for InceptionV3
MAX_VOCAB = 5000        # max number of words (keeps memory low)
MAX_LENGTH = 30         # max caption length (words)
EMBED_DIM = 256
UNITS = 256
BATCH_SIZE = 64
EPOCHS = 20              # low number to test quickly; increase later
LEARNING_RATE = 1e-4

# Subset option (set True to test quickly)
USE_SUBSET = False
SUBSET_SIZE = 500       # number of images to use if USE_SUBSET=True


Cell 3 — Load captions and create a mapping image -> captions

What this does: reads the JSON caption file and creates a dictionary where each image filename maps to a list of captions (strings).

In [24]:
# Cell 3 - load captions and map image file -> list of captions
with open(ANNOTATIONS_FILE, 'r') as f:
    ann = json.load(f)

# Map COCO image id -> filename
imageid2file = {im['id']: im['file_name'] for im in ann['images']}

# Build filename -> captions
img2caps = {}
for a in ann['annotations']:
    fname = imageid2file[a['image_id']]
    cap = a['caption'].strip().lower()
    img2caps.setdefault(fname, []).append(cap)

# Optionally take a small subset for quick testing
all_fnames = list(img2caps.keys())
if USE_SUBSET:
    chosen = set(random.sample(all_fnames, min(SUBSET_SIZE, len(all_fnames))))
    img2caps = {k: v for k, v in img2caps.items() if k in chosen}

print("Number of images available:", len(img2caps))
# Show an example
sample = next(iter(img2caps.items()))
print("Example image:", sample[0])
print("First 2 captions:", sample[1][:2])


Number of images available: 118287
Example image: 000000203564.jpg
First 2 captions: ['a bicycle replica with a clock as the front wheel.', 'the bike has a clock as a tire.']


Cell 4 — Helper: load image & preprocess for InceptionV3

What this does: defines a small function that loads an image, resizes it, and applies InceptionV3 preprocessing.

In [25]:
# Cell 4 - helper to load and preprocess a single image
def load_and_preprocess_image(path):
    img = Image.open(path).convert('RGB').resize(IMG_SIZE)
    arr = np.array(img).astype('float32')
    arr = preprocess_input(arr)  # scale, normalize like InceptionV3 expects
    return arr


Cell 5 — Extract and save image features (run once)

What this does: uses pre-trained InceptionV3 (without the final classification head) to convert each image into a fixed-length feature vector and saves it to disk.
Run this cell once — it may take time depending on number of images.

In [57]:
 # Cell 5 - extract pooled features and save as .npy files
base_model = InceptionV3(include_top=False, weights='imagenet', pooling='avg')  # pooling='avg' -> one vector per image
base_model.trainable = False
print("Feature dimension:", base_model.output_shape[1])

for fname in tqdm(img2caps.keys(), desc="Extracting features"):
    image_id = os.path.splitext(fname)[0]
    out_path = os.path.join(FEATURES_DIR, image_id + '.npy')
    if os.path.exists(out_path):
        continue
    img_path = os.path.join(IMAGES_DIR, fname)
    if not os.path.exists(img_path):
        # skip if image file missing
        continue
    arr = load_and_preprocess_image(img_path)
    arr = np.expand_dims(arr, 0)
    feat = base_model.predict(arr, verbose=0)[0]   # shape (feature_dim,)
    np.save(out_path, feat)
print("Feature extraction done. Saved in", FEATURES_DIR)


Feature dimension: 2048


Extracting features: 100%|██████████| 118287/118287 [00:06<00:00, 17266.89it/s]

Feature extraction done. Saved in ./features





Cell 5.1 extracted feature vectors

In [None]:
# === Cell 5.1 extracted feature vectors ===
import numpy as np
import random
import os

# Pick some random image IDs from train_ids
sample_ids = random.sample(train_ids, 3)

for img_id in sample_ids:
    feat_path = os.path.join(FEATURES_DIR, img_id + '.npy')
    if os.path.exists(feat_path):
        feat = np.load(feat_path)
        print(f"\nImage ID: {img_id}")
        print("Feature vector shape:", feat.shape)
        print("First 10 feature values:", feat[:10])
    else:
        print(f"Feature file missing for {img_id}")



Image ID: 000000315805
Feature vector shape: (2048,)
First 10 feature values: [0.1403391  0.58434105 0.26905352 0.3974986  0.625443   0.21929707
 1.0755721  0.31673926 0.01581189 0.31125978]

Image ID: 000000162358
Feature vector shape: (2048,)
First 10 feature values: [0.31865853 0.16279033 0.92625237 0.390826   0.48552263 0.2658648
 0.19599834 0.16347247 0.02084418 0.39156142]

Image ID: 000000304684
Feature vector shape: (2048,)
First 10 feature values: [0.20918494 0.15040675 0.2227888  0.02145237 0.0744825  0.64717346
 0.6128953  0.17526665 0.4107612  0.4435931 ]


Cell 6 — Prepare captions: add <start> and <end>, build tokenizer
What this does: for each caption we add <start> and <end>, then fit a Keras tokenizer (word → integer). We save tokenizer to disk.

In [27]:
# Cell 6 - prepare captions and tokenizer
all_captions = []
image_ids = []   # parallel list: each caption has the image id it belongs to

for fname, caps in img2caps.items():
    image_id = os.path.splitext(fname)[0]
    feat_file = os.path.join(FEATURES_DIR, image_id + '.npy')
    if not os.path.exists(feat_file):
        continue
    for c in caps:
        text = '<start> ' + c + ' <end>'
        all_captions.append(text)
        image_ids.append(image_id)

print("Total captions available:", len(all_captions))

# tokenizer: convert words to integers
tokenizer = Tokenizer(num_words=MAX_VOCAB, oov_token='<unk>', filters='!"#$%&()*+.,-/:;=?@[\]^_`{|}~ ')
tokenizer.fit_on_texts(all_captions)
with open(TOKENIZER_PATH, 'wb') as f:
    pickle.dump(tokenizer, f)

vocab_size = min(MAX_VOCAB, len(tokenizer.word_index) + 1)
print("Vocab size (used):", vocab_size)


  tokenizer = Tokenizer(num_words=MAX_VOCAB, oov_token='<unk>', filters='!"#$%&()*+.,-/:;=?@[\]^_`{|}~ ')


Total captions available: 591753
Vocab size (used): 5000


Cell 7 — Convert captions to sequences and make training lists
What this does: converts caption strings to integer sequences and pairs them with their image feature ids. This prepares the data we will feed to the model.

In [28]:
# Cell 7 - convert captions to sequences and prepare training examples
sequences = tokenizer.texts_to_sequences(all_captions)

# Truncate or pad later; but first map image -> list of sequences
from collections import defaultdict
imgid2seqs = defaultdict(list)
for img_id, seq in zip(image_ids, sequences):
    if len(seq) > MAX_LENGTH:
        seq = seq[:MAX_LENGTH]  # simple truncation for very long captions
    imgid2seqs[img_id].append(seq)

# train/val split by image ids
img_ids = list(imgid2seqs.keys())
random.shuffle(img_ids)
split = int(0.8 * len(img_ids))
train_ids = img_ids[:split]
val_ids = img_ids[split:]
print("Train images:", len(train_ids), "Validation images:", len(val_ids))


Train images: 94629 Validation images: 23658


Cell 8 — Simple data generator (yields batches)

What this does: creates a generator that yields batches of image feature vectors, input sequences (padded), and target sequences (padded). We use "teacher forcing" where the model sees the true preceding words during training.

In [29]:
# --- Replacement Cell 8: CaptionSequence (use instead of data_generator) ---
import math
from tensorflow.keras.utils import Sequence

class CaptionSequence(Sequence):
    """
    Keras Sequence to yield batches of ([features, in_seqs], out_seqs)
    Loads image features from .npy files on the fly to keep memory low.
    """
    def __init__(self, image_ids, batch_size=BATCH_SIZE, shuffle=True):
        self.image_ids = list(image_ids)
        self.batch_size = batch_size
        self.shuffle = shuffle
        # build list of samples: (img_id, in_padded, out_padded)
        self.samples = []
        for img_id in self.image_ids:
            feat_path = os.path.join(FEATURES_DIR, img_id + '.npy')
            if not os.path.exists(feat_path):
                continue
            for seq in imgid2seqs[img_id]:
                in_seq = seq[:-1]
                out_seq = seq[1:]
                in_padded = pad_sequences([in_seq], maxlen=MAX_LENGTH, padding='post')[0]
                out_padded = pad_sequences([out_seq], maxlen=MAX_LENGTH, padding='post')[0]
                self.samples.append((img_id, in_padded, out_padded))
        self.on_epoch_end()

    def __len__(self):
        return math.ceil(len(self.samples) / self.batch_size)

    def __getitem__(self, idx):
        # build batch
        batch_samples = self.samples[idx * self.batch_size : (idx + 1) * self.batch_size]
        X_feats = []
        X_seq = []
        Y_seq = []
        for (img_id, in_padded, out_padded) in batch_samples:
            feat = np.load(os.path.join(FEATURES_DIR, img_id + '.npy'))
            X_feats.append(feat)
            X_seq.append(in_padded)
            Y_seq.append(out_padded)
        # convert to numpy arrays
        X_feats = np.array(X_feats)           # shape (B, feature_dim)
        X_seq = np.array(X_seq)               # shape (B, MAX_LENGTH)
        Y_seq = np.array(Y_seq)               # shape (B, MAX_LENGTH)
        return [X_feats, X_seq], Y_seq

    def on_epoch_end(self):
        if self.shuffle:
            random.shuffle(self.samples)


Cell 9 — Build a simple model (encoder = feature vector; decoder = embedding + LSTM)

What this does: defines the Keras model. We take image features, pass them through a dense layer, concatenate them to each timestep embedding, then LSTM predicts a distribution over words for every timestep.

In [31]:
# Replacement Cell 9 - re-build model with mask_zero=False and ready for custom masked loss
from tensorflow.keras.layers import RepeatVector, Concatenate, Dropout

# get feature vector size from one saved .npy file
sample_feat_file = next(iter(glob(os.path.join(FEATURES_DIR, '*.npy'))))
feature_dim = np.load(sample_feat_file).shape[0]
vocab_size_used = vocab_size  # computed earlier

# Model inputs
feat_input = Input(shape=(feature_dim,), name='image_features')   # one vector per image
seq_input = Input(shape=(MAX_LENGTH,), name='input_seq')         # token sequence input

# Map image feature to EMBED_DIM and repeat it MAX_LENGTH times using RepeatVector
feat_dense = Dense(EMBED_DIM, activation='relu', name='feat_dense')(feat_input)  # (batch, EMBED_DIM)
feat_tiled = RepeatVector(MAX_LENGTH, name='feat_tiled')(feat_dense)            # (batch, MAX_LENGTH, EMBED_DIM)

# Embedding for tokens - IMPORTANT: mask_zero=False to avoid mask broadcasting issues
emb = Embedding(vocab_size_used, EMBED_DIM, mask_zero=False, name='token_embedding')(seq_input)  # (batch,MAX,embed)

# Concatenate feature (tiled) with token embedding at each timestep
decoder_input = Concatenate(axis=-1, name='concat_feat_token')([emb, feat_tiled])  # (batch,MAX, 2*EMBED)

# LSTM and output
lstm = LSTM(UNITS, return_sequences=True, name='decoder_lstm')(decoder_input)
drop = Dropout(0.5, name='dropout')(lstm)
outputs = Dense(vocab_size_used, activation='softmax', name='output_dense')(drop)

model = Model(inputs=[feat_input, seq_input], outputs=outputs, name='SimpleImageCaptionModel_v2')

# Define masked loss (ignores padding token 0 in y_true)
import tensorflow as tf
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False, reduction='none')
def masked_loss(y_true, y_pred):
    # y_true: (batch, MAX_LENGTH), y_pred: (batch, MAX_LENGTH, vocab)
    loss_ = loss_object(y_true, y_pred)                    # (batch, MAX_LENGTH)
    mask = tf.cast(tf.not_equal(y_true, 0), tf.float32)    # 1.0 for real tokens, 0.0 for padding
    loss_ = loss_ * mask
    # avoid dividing by zero
    denom = tf.reduce_sum(mask)
    return tf.reduce_sum(loss_) / (denom + 1e-7)

model.compile(optimizer=Adam(LEARNING_RATE), loss=masked_loss)
model.summary()


Cell 10 — Train the model (quick run)

What this does: creates training and validation generators and runs .fit() for a few epochs. Keep EPOCHS small at first.

In [34]:
# === CELL 10 (FINAL — with robust validation repeat) ===
import os
import re
import glob
import math
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences

# -------------------------
# Detect feature dim
# -------------------------
sample_feat_path_iter = iter(glob.glob(os.path.join(FEATURES_DIR, '*.npy')))
sample_feat = np.load(next(sample_feat_path_iter))
FEATURE_DIM = sample_feat.shape[0]
print("FEATURE_DIM detected:", FEATURE_DIM)

# -------------------------
# Data generator
# -------------------------
def gen_for_ids(image_id_list):
    for img_id in image_id_list:
        feat_path = os.path.join(FEATURES_DIR, img_id + '.npy')
        if not os.path.exists(feat_path):
            continue
        feat = np.load(feat_path).astype('float32')
        for seq in imgid2seqs[img_id]:
            in_seq = seq[:-1]
            out_seq = seq[1:]
            in_padded = pad_sequences([in_seq], maxlen=MAX_LENGTH, padding='post')[0].astype('int32')
            out_padded = pad_sequences([out_seq], maxlen=MAX_LENGTH, padding='post')[0].astype('int32')
            yield feat, in_padded, out_padded

# TensorSpecs
feature_spec = tf.TensorSpec(shape=(FEATURE_DIM,), dtype=tf.float32)
inseq_spec = tf.TensorSpec(shape=(MAX_LENGTH,), dtype=tf.int32)
outseq_spec = tf.TensorSpec(shape=(MAX_LENGTH,), dtype=tf.int32)
output_signature = (feature_spec, inseq_spec, outseq_spec)

# Build initial datasets (will be rebuilt before fit)
train_ds = tf.data.Dataset.from_generator(lambda: gen_for_ids(train_ids), output_signature=output_signature)
val_ds   = tf.data.Dataset.from_generator(lambda: gen_for_ids(val_ids),   output_signature=output_signature)

def map_fn(feat, inseq, outseq):
    return ((feat, inseq), outseq)

train_ds = train_ds.map(map_fn, num_parallel_calls=tf.data.AUTOTUNE)
train_ds = train_ds.shuffle(1000).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

val_ds = val_ds.map(map_fn, num_parallel_calls=tf.data.AUTOTUNE)
val_ds = val_ds.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

# Sanity check
for (x_batch, y_batch) in train_ds.take(1):
    print("Feature batch shape:", x_batch[0].shape)
    print("Input seq batch shape:", x_batch[1].shape)
    print("Target batch shape:", y_batch.shape)

# -------------------------
# Checkpoint discovery
# -------------------------
CKPT_DIR = 'saved_models_tf'
latest_epoch = 0
latest_path = None

if os.path.exists(CKPT_DIR):
    for name in os.listdir(CKPT_DIR):
        m = re.match(r'epoch_(\d+)(?:\.keras)?$', name)
        if m:
            epoch_num = int(m.group(1))
            candidate = os.path.join(CKPT_DIR, name)
            # prefer single-file .keras if present
            if name.endswith('.keras'):
                if epoch_num >= latest_epoch:
                    latest_epoch = epoch_num
                    latest_path = candidate
            else:
                keras_sibling = candidate + '.keras'
                if os.path.exists(keras_sibling) and epoch_num >= latest_epoch:
                    latest_epoch = epoch_num
                    latest_path = keras_sibling

# -------------------------
# Load checkpoint + RERUN option
# -------------------------
initial_epoch = 0

if latest_path is not None and not START_FRESH:
    print(f"\nFound checkpoint: {latest_path} (saved epoch {latest_epoch})")
    # Try to load with compile=True to restore optimizer & training config
    try:
        model = tf.keras.models.load_model(latest_path, compile=True, custom_objects={"masked_loss": masked_loss})
        print("Loaded checkpoint with compile=True (optimizer state restored).")
    except Exception as e:
        print("Load with compile=True failed:", repr(e))
        print("Falling back to load_model(..., compile=False) and manual compile.")
        model = tf.keras.models.load_model(latest_path, compile=False)
        # safe manual compile fallback
        try:
            optimizer = tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE)
            loss_fn = masked_loss
        except Exception:
            optimizer = tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE)
            loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)
            print("masked_loss not found — using SparseCategoricalCrossentropy fallback for manual compile.")
        model.compile(optimizer=optimizer, loss=loss_fn, metrics=["accuracy"])
        print("Loaded without optimizer state; model compiled manually.")

    # RERUN option: set to True to re-run the saved epoch, False to resume next epoch
    RERUN_LAST = False   # <-- set False for normal resume (start from next epoch)
    if RERUN_LAST:
        initial_epoch = max(0, latest_epoch - 1)
    else:
        initial_epoch = latest_epoch

    # Friendly message
    if RERUN_LAST:
        print(f"Re-running the most recently completed epoch: epoch {initial_epoch + 1}")
    else:
        print(f"Resuming training from the next epoch: epoch {initial_epoch + 1}")

else:
    print("\nNo checkpoint found or START_FRESH=True. Starting from scratch.")
    initial_epoch = 0

# -------------------------
# Training / steps per epoch (dynamic)
# -------------------------
try:
    num_samples = len(train_ids) * 5   # 5 captions per image
    num_images = len(train_ids)
except Exception:
    num_images = 5000
    num_samples = num_images * 5

STEPS_PER_EPOCH = math.ceil(num_samples / BATCH_SIZE)

print("\n===== DATASET SUMMARY =====")
print(f"Images in training split:    {num_images}")
print(f"Captions per image:          5")
print(f"Total caption samples:       {num_samples}")
print(f"Batch size:                  {BATCH_SIZE}")
print(f"→ Steps per epoch:           {STEPS_PER_EPOCH}")
print("============================\n")

# If initial_epoch >= EPOCHS we skip training (already finished)
if initial_epoch >= EPOCHS:
    print(f"Checkpoint shows training already complete up to epoch {initial_epoch}. No further training.")
    try:
        model.save(MODEL_PATH)
        print("Final model saved to:", MODEL_PATH)
    except Exception as e:
        print("Final save failed:", e)
else:
    # ---------- Rebuild train_ds and val_ds right before training (prevents earlier iterations from exhausting them) ----------
    train_ds = tf.data.Dataset.from_generator(lambda: gen_for_ids(train_ids), output_signature=output_signature)
    train_ds = train_ds.map(map_fn, num_parallel_calls=tf.data.AUTOTUNE)
    train_ds = train_ds.shuffle(1000).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

    val_ds = tf.data.Dataset.from_generator(lambda: gen_for_ids(val_ids), output_signature=output_signature)
    val_ds = val_ds.map(map_fn, num_parallel_calls=tf.data.AUTOTUNE)
    val_ds = val_ds.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

    # ---------- Optional quick debug: count batches available (without repeat) ----------
    # debug_count = 0
    # for _ in train_ds.take(10000):
    #     debug_count += 1
    # print(f"DEBUG: batches available (without repeat): {debug_count}  — expected: {STEPS_PER_EPOCH}")

    # if debug_count < STEPS_PER_EPOCH:
    #     print("⚠ Note: dataset produces fewer batches than expected. .repeat() will fill remaining steps_per_epoch.")

    # ---------- Fit using repeat() so epochs always have STEPS_PER_EPOCH batches and val stable ----------
    VAL_SAMPLES = len(val_ids) * 5
    VALIDATION_STEPS = math.ceil(VAL_SAMPLES / BATCH_SIZE)

    history = model.fit(
        train_ds.repeat(),            # ensures the dataset restarts if it ends early
        validation_data=val_ds.repeat(),
        validation_steps=VALIDATION_STEPS,
        epochs=EPOCHS,
        initial_epoch=initial_epoch,
        callbacks=[save_callback],
        steps_per_epoch=STEPS_PER_EPOCH
    )

    # ---------- Final save ----------
    try:
        model.save(MODEL_PATH)
        print("Training finished and model saved to:", MODEL_PATH)
    except Exception as e:
        print("Final save failed:", e)


FEATURE_DIM detected: 2048
Feature batch shape: (64, 2048)
Input seq batch shape: (64, 30)
Target batch shape: (64, 30)

Found checkpoint: saved_models_tf\epoch_5.keras (saved epoch 5)
Loaded checkpoint with compile=True (optimizer state restored).
Resuming training from the next epoch: epoch 6

===== DATASET SUMMARY =====
Images in training split:    94629
Captions per image:          5
Total caption samples:       473145
Batch size:                  64
→ Steps per epoch:           7393

Epoch 6/20
[1m7393/7393[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 514ms/step - loss: 2.8035INFO:tensorflow:Assets written to: saved_models_tf\epoch_6\assets


INFO:tensorflow:Assets written to: saved_models_tf\epoch_6\assets


Saved artifact at 'saved_models_tf\epoch_6'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): List[TensorSpec(shape=(None, 2048), dtype=tf.float32, name='image_features'), TensorSpec(shape=(None, 30), dtype=tf.float32, name='input_seq')]
Output Type:
  TensorSpec(shape=(None, 30, 5000), dtype=tf.float32, name=None)
Captures:
  2298205755280: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205754704: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205754896: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205755856: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298240760528: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205756624: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205754128: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205756240: TensorSpec(shape=(), dtype=tf.resource, name=None)
Exported TF artifact: saved_models_tf\epoch_6
Saved .keras checkpoint: saved_models_tf\epoc

INFO:tensorflow:Assets written to: saved_models_tf\epoch_7\assets


Saved artifact at 'saved_models_tf\epoch_7'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): List[TensorSpec(shape=(None, 2048), dtype=tf.float32, name='image_features'), TensorSpec(shape=(None, 30), dtype=tf.float32, name='input_seq')]
Output Type:
  TensorSpec(shape=(None, 30, 5000), dtype=tf.float32, name=None)
Captures:
  2298205755280: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205754704: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205754896: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205755856: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298240760528: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205756624: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205754128: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205756240: TensorSpec(shape=(), dtype=tf.resource, name=None)
Exported TF artifact: saved_models_tf\epoch_7
Saved .keras checkpoint: saved_models_tf\epoc

INFO:tensorflow:Assets written to: saved_models_tf\epoch_8\assets


Saved artifact at 'saved_models_tf\epoch_8'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): List[TensorSpec(shape=(None, 2048), dtype=tf.float32, name='image_features'), TensorSpec(shape=(None, 30), dtype=tf.float32, name='input_seq')]
Output Type:
  TensorSpec(shape=(None, 30, 5000), dtype=tf.float32, name=None)
Captures:
  2298205755280: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205754704: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205754896: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205755856: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298240760528: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205756624: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205754128: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205756240: TensorSpec(shape=(), dtype=tf.resource, name=None)
Exported TF artifact: saved_models_tf\epoch_8
Saved .keras checkpoint: saved_models_tf\epoc

INFO:tensorflow:Assets written to: saved_models_tf\epoch_9\assets


Saved artifact at 'saved_models_tf\epoch_9'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): List[TensorSpec(shape=(None, 2048), dtype=tf.float32, name='image_features'), TensorSpec(shape=(None, 30), dtype=tf.float32, name='input_seq')]
Output Type:
  TensorSpec(shape=(None, 30, 5000), dtype=tf.float32, name=None)
Captures:
  2298205755280: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205754704: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205754896: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205755856: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298240760528: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205756624: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205754128: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205756240: TensorSpec(shape=(), dtype=tf.resource, name=None)
Exported TF artifact: saved_models_tf\epoch_9
Saved .keras checkpoint: saved_models_tf\epoc

INFO:tensorflow:Assets written to: saved_models_tf\epoch_10\assets


Saved artifact at 'saved_models_tf\epoch_10'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): List[TensorSpec(shape=(None, 2048), dtype=tf.float32, name='image_features'), TensorSpec(shape=(None, 30), dtype=tf.float32, name='input_seq')]
Output Type:
  TensorSpec(shape=(None, 30, 5000), dtype=tf.float32, name=None)
Captures:
  2298205755280: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205754704: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205754896: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205755856: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298240760528: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205756624: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205754128: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205756240: TensorSpec(shape=(), dtype=tf.resource, name=None)
Exported TF artifact: saved_models_tf\epoch_10
Saved .keras checkpoint: saved_models_tf\ep

INFO:tensorflow:Assets written to: saved_models_tf\epoch_11\assets


Saved artifact at 'saved_models_tf\epoch_11'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): List[TensorSpec(shape=(None, 2048), dtype=tf.float32, name='image_features'), TensorSpec(shape=(None, 30), dtype=tf.float32, name='input_seq')]
Output Type:
  TensorSpec(shape=(None, 30, 5000), dtype=tf.float32, name=None)
Captures:
  2298205755280: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205754704: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205754896: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205755856: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298240760528: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205756624: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205754128: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205756240: TensorSpec(shape=(), dtype=tf.resource, name=None)
Exported TF artifact: saved_models_tf\epoch_11
Saved .keras checkpoint: saved_models_tf\ep

INFO:tensorflow:Assets written to: saved_models_tf\epoch_12\assets


Saved artifact at 'saved_models_tf\epoch_12'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): List[TensorSpec(shape=(None, 2048), dtype=tf.float32, name='image_features'), TensorSpec(shape=(None, 30), dtype=tf.float32, name='input_seq')]
Output Type:
  TensorSpec(shape=(None, 30, 5000), dtype=tf.float32, name=None)
Captures:
  2298205755280: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205754704: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205754896: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205755856: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298240760528: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205756624: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205754128: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205756240: TensorSpec(shape=(), dtype=tf.resource, name=None)
Exported TF artifact: saved_models_tf\epoch_12
Saved .keras checkpoint: saved_models_tf\ep

INFO:tensorflow:Assets written to: saved_models_tf\epoch_13\assets


Saved artifact at 'saved_models_tf\epoch_13'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): List[TensorSpec(shape=(None, 2048), dtype=tf.float32, name='image_features'), TensorSpec(shape=(None, 30), dtype=tf.float32, name='input_seq')]
Output Type:
  TensorSpec(shape=(None, 30, 5000), dtype=tf.float32, name=None)
Captures:
  2298205755280: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205754704: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205754896: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205755856: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298240760528: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205756624: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205754128: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205756240: TensorSpec(shape=(), dtype=tf.resource, name=None)
Exported TF artifact: saved_models_tf\epoch_13
Saved .keras checkpoint: saved_models_tf\ep

INFO:tensorflow:Assets written to: saved_models_tf\epoch_14\assets


Saved artifact at 'saved_models_tf\epoch_14'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): List[TensorSpec(shape=(None, 2048), dtype=tf.float32, name='image_features'), TensorSpec(shape=(None, 30), dtype=tf.float32, name='input_seq')]
Output Type:
  TensorSpec(shape=(None, 30, 5000), dtype=tf.float32, name=None)
Captures:
  2298205755280: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205754704: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205754896: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205755856: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298240760528: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205756624: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205754128: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205756240: TensorSpec(shape=(), dtype=tf.resource, name=None)
Exported TF artifact: saved_models_tf\epoch_14
Saved .keras checkpoint: saved_models_tf\ep

INFO:tensorflow:Assets written to: saved_models_tf\epoch_15\assets


Saved artifact at 'saved_models_tf\epoch_15'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): List[TensorSpec(shape=(None, 2048), dtype=tf.float32, name='image_features'), TensorSpec(shape=(None, 30), dtype=tf.float32, name='input_seq')]
Output Type:
  TensorSpec(shape=(None, 30, 5000), dtype=tf.float32, name=None)
Captures:
  2298205755280: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205754704: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205754896: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205755856: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298240760528: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205756624: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205754128: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205756240: TensorSpec(shape=(), dtype=tf.resource, name=None)
Exported TF artifact: saved_models_tf\epoch_15
Saved .keras checkpoint: saved_models_tf\ep

INFO:tensorflow:Assets written to: saved_models_tf\epoch_16\assets


Saved artifact at 'saved_models_tf\epoch_16'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): List[TensorSpec(shape=(None, 2048), dtype=tf.float32, name='image_features'), TensorSpec(shape=(None, 30), dtype=tf.float32, name='input_seq')]
Output Type:
  TensorSpec(shape=(None, 30, 5000), dtype=tf.float32, name=None)
Captures:
  2298205755280: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205754704: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205754896: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205755856: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298240760528: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205756624: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205754128: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205756240: TensorSpec(shape=(), dtype=tf.resource, name=None)
Exported TF artifact: saved_models_tf\epoch_16
Saved .keras checkpoint: saved_models_tf\ep

INFO:tensorflow:Assets written to: saved_models_tf\epoch_17\assets


Saved artifact at 'saved_models_tf\epoch_17'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): List[TensorSpec(shape=(None, 2048), dtype=tf.float32, name='image_features'), TensorSpec(shape=(None, 30), dtype=tf.float32, name='input_seq')]
Output Type:
  TensorSpec(shape=(None, 30, 5000), dtype=tf.float32, name=None)
Captures:
  2298205755280: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205754704: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205754896: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205755856: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298240760528: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205756624: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205754128: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205756240: TensorSpec(shape=(), dtype=tf.resource, name=None)
Exported TF artifact: saved_models_tf\epoch_17
Saved .keras checkpoint: saved_models_tf\ep

INFO:tensorflow:Assets written to: saved_models_tf\epoch_18\assets


Saved artifact at 'saved_models_tf\epoch_18'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): List[TensorSpec(shape=(None, 2048), dtype=tf.float32, name='image_features'), TensorSpec(shape=(None, 30), dtype=tf.float32, name='input_seq')]
Output Type:
  TensorSpec(shape=(None, 30, 5000), dtype=tf.float32, name=None)
Captures:
  2298205755280: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205754704: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205754896: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205755856: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298240760528: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205756624: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205754128: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205756240: TensorSpec(shape=(), dtype=tf.resource, name=None)
Exported TF artifact: saved_models_tf\epoch_18
Saved .keras checkpoint: saved_models_tf\ep

INFO:tensorflow:Assets written to: saved_models_tf\epoch_19\assets


Saved artifact at 'saved_models_tf\epoch_19'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): List[TensorSpec(shape=(None, 2048), dtype=tf.float32, name='image_features'), TensorSpec(shape=(None, 30), dtype=tf.float32, name='input_seq')]
Output Type:
  TensorSpec(shape=(None, 30, 5000), dtype=tf.float32, name=None)
Captures:
  2298205755280: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205754704: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205754896: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205755856: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298240760528: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205756624: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205754128: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205756240: TensorSpec(shape=(), dtype=tf.resource, name=None)
Exported TF artifact: saved_models_tf\epoch_19
Saved .keras checkpoint: saved_models_tf\ep

INFO:tensorflow:Assets written to: saved_models_tf\epoch_20\assets


Saved artifact at 'saved_models_tf\epoch_20'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): List[TensorSpec(shape=(None, 2048), dtype=tf.float32, name='image_features'), TensorSpec(shape=(None, 30), dtype=tf.float32, name='input_seq')]
Output Type:
  TensorSpec(shape=(None, 30, 5000), dtype=tf.float32, name=None)
Captures:
  2298205755280: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205754704: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205754896: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205755856: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298240760528: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205756624: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205754128: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2298205756240: TensorSpec(shape=(), dtype=tf.resource, name=None)
Exported TF artifact: saved_models_tf\epoch_20
Saved .keras checkpoint: saved_models_tf\ep

Cell 11 — Helper: convert sequence of token ids back to text

What this does: loads tokenizer and defines a utility to convert numeric sequences back to readable words.

In [43]:
# Cell 11 - load tokenizer and helper function
with open(TOKENIZER_PATH, 'rb') as f:
    tokenizer = pickle.load(f)

index_word = {v: k for k, v in tokenizer.word_index.items()}

def seq_to_text(seq):
    words = []
    for idx in seq:
        if idx == 0:
            continue
        word = index_word.get(idx, '')
        if word == '<end>':
            break
        words.append(word)
    return ' '.join(words)


Cell 12 — Inference: greedy caption generation for one image

What this does: given an image file id (e.g., '000000012345'), this function generates a caption word-by-word greedily (chooses most probable next word). Example usage included.

In [55]:
# Cell 12 - fixed greedy inference (uses full history)
def generate_caption(image_id, max_len=MAX_LENGTH):
    # 1) load feature
    feat_path = os.path.join(FEATURES_DIR, image_id + '.npy')
    if not os.path.exists(feat_path):
        raise FileNotFoundError(f"Feature file not found for: {feat_path}")
    feat = np.load(feat_path).astype('float32').reshape(1, -1)

    # 2) setup tokens
    start_id = tokenizer.word_index.get('<start>')
    end_id   = tokenizer.word_index.get('<end>')
    in_tokens = [start_id]   # sequence we feed (grows every step)

    for _ in range(max_len):
        # pad current sequence
        in_seq = pad_sequences([in_tokens], maxlen=MAX_LENGTH, padding='post')

        # 3) predict distribution over vocab for all timesteps
        preds = model.predict([feat, in_seq], verbose=0)  # (1, MAX_LENGTH, vocab)

        # we care about the last real timestep = len(in_tokens) - 1
        step = len(in_tokens) - 1
        token_probs = preds[0, step]           # shape (vocab_size,)
        next_id = int(np.argmax(token_probs))  # greedy

        # 4) stopping conditions
        if next_id == 0:              # pad
            break
        if end_id is not None and next_id == end_id:
            break

        # 5) append token
        in_tokens.append(next_id)

    # 6) convert ids → words, skipping <start>/<end>/0
    words = []
    for idx in in_tokens[1:]:  # skip <start>
        if idx == 0:
            break
        if end_id is not None and idx == end_id:
            break
        w = index_word.get(idx, '')
        if w in ('<start>', '<end>', ''):
            continue
        words.append(w)

    return ' '.join(words)

# Quick test on a random validation image
if len(val_ids) > 0:
    test_img = random.choice(val_ids)
    print("Image ID:", test_img)
    print("Generated caption:", generate_caption(test_img))
    print("Reference captions:", img2caps.get(test_img + '.jpg', [])[:3])
else:
    print("No validation images available for test.")


Image ID: 000000061675
Generated caption: a herd of sheep grazing on a lush green field
Reference captions: ['some white and black sheep are on some grass hills and trees', 'some sheep standing together while surrounded by some tall grass', 'a herd of sheep standing on a lush green hillside.']


In [46]:
import nltk
nltk.download('punkt_tab')   # downloads the punkt_tab data
nltk.download('punkt')       # safe to run again (already up-to-date)
print("Downloaded punkt_tab and punkt. Now re-run your BLEU evaluation cell.")


Downloaded punkt_tab and punkt. Now re-run your BLEU evaluation cell.


[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\yazda\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\yazda\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Cell 13 — Quick BLEU evaluation (small sample)

What this does: computes BLEU-1 and BLEU-4 on a small sample of validation images to get a rough numeric measure of quality.

In [56]:
# Cell 13 - quick BLEU check on small sample
def evaluate_bleu(n=50):
    sample = random.sample(val_ids, min(n, len(val_ids)))
    refs = []
    hyps = []
    for img_id in sample:
        references = [nltk.word_tokenize(c) for c in img2caps.get(img_id + '.jpg', [])]
        pred = generate_caption(img_id)
        hyp = nltk.word_tokenize(pred)
        refs.append(references)
        hyps.append(hyp)
    bleu1 = corpus_bleu(refs, hyps, weights=(1,0,0,0))
    bleu4 = corpus_bleu(refs, hyps, weights=(0.25,0.25,0.25,0.25))
    print(f"BLEU-1: {bleu1:.4f}, BLEU-4: {bleu4:.4f}")

# Run evaluation
evaluate_bleu(30)


BLEU-1: 0.6424, BLEU-4: 0.2137


Cell 14 — Save tokenizer and final notes

What this does: ensures tokenizer is saved for later use and gives tips.

In [52]:
# Cell 14 - save tokenizer
with open(TOKENIZER_PATH, 'wb') as f:
    pickle.dump(tokenizer, f)
print("Tokenizer saved to", TOKENIZER_PATH)
print("Done. Tips:\n- Increase EPOCHS and MAX_VOCAB for better results.")


Tokenizer saved to ./tokenizer.pickle
Done. Tips:
- Increase EPOCHS and MAX_VOCAB for better results.
