In [6]:
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image
import gradio as gr

In [7]:
# Initialize the processor and model from Hugging Face
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

In [8]:
def gen_caption(image):
    inputs = processor(images=image, return_tensors="pt")
    outputs = model.generate(**inputs)
    caption = processor.decode(outputs[0], skip_special_token=True)
    return caption 

In [9]:
def caption_img(image):
    try:
        caption = gen_caption(image)
        return caption
    except Exception as e:
        return f"An error occured: {str(e)}"

In [10]:
ui = gr.Interface(
    fn = caption_img,
    inputs = gr.Image(type="pil"),
    outputs = "text",
    title = "Image Captioning",
    description = "Upload an image to generate caption."
)

ui.launch(server_name="localhost")

* Running on local URL:  http://localhost:7862
* To create a public link, set `share=True` in `launch()`.


