<a href="https://colab.research.google.com/github/lusifer468/CodSoft_Internship/blob/main/Task_no_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [14]:
pip install tensorflow numpy



In [15]:
import tensorflow as tf
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, LSTM, Embedding, Dropout, Attention

# Load a pre-trained VGG16 model without the top (fully connected) layers
base_model = VGG16(weights='imagenet')
base_model = Model(inputs=base_model.input, outputs=base_model.get_layer('fc2').output)

# Define the image feature extraction model
def extract_image_features(img_path):
    img = image.load_img(img_path, target_size=(224, 224))
    img_array = image.img_to_array(img)
    img_array = preprocess_input(img_array)
    img_array = tf.expand_dims(img_array, 0)  # Expand the batch dimension
    features = base_model(img_array)
    return tf.squeeze(features, axis=0)  # Remove the batch dimension

# Define the captioning model
def build_captioning_model(vocab_size, max_length):
    # Image input
    image_input = Input(shape=(4096,))

    # Partial caption input
    caption_input = Input(shape=(max_length,))

    # Caption embedding
    caption_embedding = Embedding(input_dim=vocab_size, output_dim=256, input_length=max_length)(caption_input)

    # Attention mechanism
    attention = Attention()([image_input, caption_embedding])

    # Concatenate image features and caption embedding
    merged = tf.concat([image_input, attention], axis=-1)

    # LSTM layer
    lstm = LSTM(256)(merged)
    lstm = Dropout(0.5)(lstm)

    # Output layer
    output = Dense(vocab_size, activation='softmax')(lstm)

    # Build the model
    model = Model(inputs=[image_input, caption_input], outputs=output)

    return model

# Function to generate captions for an image
def generate_caption(image_path, tokenizer, model, max_length):
    # Extract image features
    image_features = extract_image_features(image_path)

    # Seed the generation process with a start token
    input_sequence = tokenizer.texts_to_sequences(['startseq'])[0]
    input_sequence = pad_sequences([input_sequence], maxlen=max_length)

    # Generate the caption word by word
    caption = []
    for _ in range(max_length):
        yhat = model.predict([image_features, input_sequence], verbose=0)
        yhat = tf.argmax(yhat, axis=-1)
        word = tokenizer.index_word[int(yhat)]
        if word is None:
            break
        input_sequence = pad_sequences([input_sequence[0].tolist() + [int(yhat)]], maxlen=max_length)
        if word == 'endseq':
            break
        caption.append(word)

    return ' '.join(caption)

# Example usage
image_path = 'path/to/your/image.jpg'  # Replace with the path to your image
vocab_size = 10000  # Adjust based on your dataset
max_length = 20  # Adjust based on your dataset

# Create a simple tokenizer (replace this with your own dataset-specific tokenization)
tokenizer = Tokenizer(num_words=vocab_size, oov_token='<OOV>')
tokenizer.fit_on_texts(['startseq', 'endseq'])  # Add special tokens for start and end

# Build and compile the model
model = build_captioning_model(vocab_size, max_length)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Generate a caption for the image
caption = generate_caption(image_path, tokenizer, model, max_length)
print("Generated Caption:", caption)


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels.h5


ValueError: ignored