In [1]:
from PIL import Image
import requests
from transformers import BlipProcessor, BlipForConditionalGeneration

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Load pre-trained BLIP model and processor
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


In [10]:
import os
def generate_caption(image_path):
    # Check if file exists
    if not os.path.isfile(image_path):
        raise FileNotFoundError(f"Image file not found: {image_path}")

    # Open image
    raw_image = Image.open(image_path).convert('RGB')

    # Preprocess image
    inputs = processor(raw_image, return_tensors="pt")

    # Generate caption
    out = model.generate(**inputs)
    caption = processor.decode(out[0], skip_special_tokens=True)

    return caption, raw_image

In [11]:
if __name__ == "__main__":
    # Replace with your own image path
    image_path = "grasshopper.jpeg"  # Make sure this image is in the same folder or provide full path

    try:
        caption, image = generate_caption(image_path)
        print("Generated Caption:")
        print(caption)
        image.show()
    except Exception as e:
        print(" Error:", str(e))

Generated Caption:
a grasshopper on a white background
