In [9]:
import tensorflow as tf
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import numpy as np

In [10]:
model = VGG16(weights="imagenet", include_top=False)

In [11]:
def extract_features(image_path):
    img = load_img(image_path, target_size=(224, 224))
    img = img_to_array(img)
    img = np.expand_dims(img, axis=0)
    img = preprocess_input(img)
    features = model.predict(img)
    return features.flatten()

In [None]:
image_path = "C:/image cap/image1.jpeg"  # Replace with your image path
features = extract_features(image_path)
print("Feature Shape:", features.shape)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 730ms/step
Feature Shape: (25088,)


In [14]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding

In [15]:
vocab_size = 5000
max_length = 10

In [17]:
caption_model = Sequential([
    Embedding(vocab_size, 256, input_length=max_length),
    LSTM(256, return_sequences=True),
    LSTM(256),
    Dense(vocab_size, activation="softmax")
])



In [18]:
caption_model.compile(loss="categorical_crossentropy", optimizer="adam")
caption_model.summary()

In [None]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [20]:
captions = {
    "C:/image cap/image1.jpeg": ["Six people", "Six Directions of life"]
}

In [21]:
def preprocess_text(text):
    tokens = nltk.word_tokenize(text.lower())
    return " ".join(tokens)

for key in captions:
    captions[key] = [preprocess_text(cap) for cap in captions[key]]

print(captions)

{'C:/image cap/image1.jpeg': ['six people', 'six directions of life']}


In [22]:
import random

In [24]:
def generate_caption(image_features):
    sample_captions = [
        "A dog playing in the park",
        "A man riding a bike",
        "A cat sitting on a sofa"
    ]
    return random.choice(sample_captions)  # Dummy prediction

caption = generate_caption(features)
print("Generated Caption:", caption)

Generated Caption: A dog playing in the park


In [2]:
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image

In [3]:
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

pytorch_model.bin:  68%|######7   | 671M/990M [00:00<?, ?B/s]

Error while downloading from https://cdn-lfs.hf.co/repos/f1/cb/f1cbe4cfb2a267026632ce513d5918162e03df2ee28456145ba8a1d25cf39aad/d6638651a5526cc2ede56f2b5104d6851b0755816d220e5e046870430180c767?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27pytorch_model.bin%3B+filename%3D%22pytorch_model.bin%22%3B&response-content-type=application%2Foctet-stream&Expires=1740236568&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTc0MDIzNjU2OH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5oZi5jby9yZXBvcy9mMS9jYi9mMWNiZTRjZmIyYTI2NzAyNjYzMmNlNTEzZDU5MTgxNjJlMDNkZjJlZTI4NDU2MTQ1YmE4YTFkMjVjZjM5YWFkL2Q2NjM4NjUxYTU1MjZjYzJlZGU1NmYyYjUxMDRkNjg1MWIwNzU1ODE2ZDIyMGU1ZTA0Njg3MDQzMDE4MGM3Njc%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qJnJlc3BvbnNlLWNvbnRlbnQtdHlwZT0qIn1dfQ__&Signature=L8VnmWGWiQRekoujCffh6dLNphNZ89Y3JEKgL2YlQRoqUl5xZNYgqnRChONzQClWfk6U3D9PDqth167%7E7jpgnjIcIdcXWede0jM7CpNCQlmmO1tFhLkKecEF--My4aUTdqZT3RcV6-u1vRK0fKf95KMKxwcmt828HTo8et61aFIOx7AW9dg9K%7E

pytorch_model.bin:  83%|########2 | 818M/990M [00:00<?, ?B/s]

KeyboardInterrupt: 

In [None]:
def generate_caption(image_path):
    image = Image.open(image_path).convert("RGB")
    inputs = processor(image, return_tensors="pt")
    output = model.generate(**inputs)
    caption = processor.decode(output[0], skip_special_tokens=True)
    return caption

In [None]:
caption = generate_caption("sample.jpg")
print("Generated Caption:", caption)