In [6]:
from transformers import BlipProcessor, BlipForConditionalGeneration
import torch
from PIL import Image
import gradio as gr

model_name = "Salesforce/blip-image-captioning-base"
processor = BlipProcessor.from_pretrained(model_name)
model = BlipForConditionalGeneration.from_pretrained(model_name).to("cuda")

def generate_caption(image):
    inputs = processor(images=image, return_tensors="pt").to("cuda")

    with torch.no_grad():
        output = model.generate(**inputs, max_new_tokens=50)

    caption = processor.decode(output[0], skip_special_tokens=True)
    return caption

custom_css = """
#title {color: #ffffff; text-align: center; font-size: 30px; font-weight: bold;}
#desc {color: #ffffff; text-align: center; font-size: 18px;}
"""


iface = gr.Interface(
    fn=generate_caption,
    inputs="image",
    outputs=gr.Textbox(label="Generated Caption"),
    title="📸 Image Captioning with BLIP",
    description="Upload an image, and the model will generate a caption.",
    allow_flagging="never",
    live=True,
    css=custom_css,
)

iface.launch(share=True)




Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://73717efd1ee6841049.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


