In [31]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img,img_to_array
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [3]:
model = tf.keras.models.load_model('model.keras')

In [5]:
fe = tf.keras.models.load_model('fe.keras')

In [7]:
import pickle
with open('tokenizer.pkl', 'rb') as tokenizer_file:
    tokenizer = pickle.load(tokenizer_file)

In [23]:
def generate_caption(caption_model, tokenizer, feature_extractor, image_path, max_length):
    """
    Generate a caption for a given image.
    
    Parameters:
        caption_model: The trained captioning model.
        tokenizer: The tokenizer used during training.
        feature_extractor: The feature extractor model.
        image_path: Path to the input image.
        max_length: Maximum length of the caption.
        
    Returns:
        str: Generated caption.
    """
    # Step 1: Extract image features using the feature extractor
    img = load_img(image_path, target_size=(224, 224))  # Load and resize the image
    img = img_to_array(img) / 255.0  # Convert to array and normalize
    img = np.expand_dims(img, axis=0)  # Add batch dimension
    feature = feature_extractor.predict(img, verbose=0)  # Extract features

    # Step 2: Generate the caption using the caption model
    sequence = tokenizer.texts_to_sequences(["startseq"])[0]  # Start with the start token
    for _ in range(max_length):
        padded_sequence = pad_sequences([sequence], maxlen=max_length)  # Pad the sequence
        y_pred = caption_model.predict([feature, padded_sequence], verbose=0)  # Predict the next word
        next_word_index = np.argmax(y_pred)  # Get the word index with highest probability
        next_word = tokenizer.index_word.get(next_word_index)  # Map index to word

        if next_word is None or next_word == "endseq":  # Stop if end token is generated
            break

        sequence.append(next_word_index)  # Add word index to sequence

    # Remove startseq and endseq tokens for the final caption
    caption = " ".join([tokenizer.index_word[idx] for idx in sequence 
                        if idx > 0 and idx not in [tokenizer.word_index['startseq'], tokenizer.word_index['endseq']]])
    return caption

In [35]:
# Example usage
image_file = 'race.jpg'
max_length = 34# Replace with your image path
predicted_caption = generate_caption(model, tokenizer, fe, image_file, max_length)
print("Predicted Caption:", predicted_caption)

Predicted Caption: car is driving on the track


In [73]:
# Example usage
image_file = r'static/images/surf.jpg'
max_length = 34# Replace with your image path
predicted_caption = generate_caption(model, tokenizer, fe, image_file, max_length)
print("Predicted Caption:", predicted_caption)

Predicted Caption: surfer in wetsuit is surfing


In [43]:
# Example usage
image_file = 'c.jpg'
max_length = 34# Replace with your image path
predicted_caption = generate_caption(model, tokenizer, fe, image_file, max_length)
print("Predicted Caption:", predicted_caption)

Predicted Caption: tennis player is playing tennis


In [47]:
# Example usage
image_file = 'bird.jpg'
max_length = 34# Replace with your image path
predicted_caption = generate_caption(model, tokenizer, fe, image_file, max_length)
print("Predicted Caption:", predicted_caption)

Predicted Caption: the bird is flying through the air


In [49]:
# Example usage
image_file = 'bird2.webp'
max_length = 34# Replace with your image path
predicted_caption = generate_caption(model, tokenizer, fe, image_file, max_length)
print("Predicted Caption:", predicted_caption)

Predicted Caption: black dog is flying through the air


In [59]:
# Example usage
image_file = 'cat.jpg'
max_length = 34# Replace with your image path
predicted_caption = generate_caption(model, tokenizer, fe, image_file, max_length)
print("Predicted Caption:", predicted_caption)

Predicted Caption: dog is running through the grass


In [61]:
# Example usage
image_file = 'dc.jpg'
max_length = 34# Replace with your image path
predicted_caption = generate_caption(model, tokenizer, fe, image_file, max_length)
print("Predicted Caption:", predicted_caption)

Predicted Caption: dog is running through the grass
