In [1]:
# Import required libraries
import gradio as gr
from transformers import BlipProcessor, BlipForConditionalGeneration
# Disable tensorflow warnings (program uses Pytorch)
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' 
import tensorflow as tf
# Disable hugging face warnings
import warnings
warnings.filterwarnings("ignore")

In [2]:
# Initialize the processor and model from Hugging Face
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

In [3]:
def generate_caption(image):
    # Directly use the PIL image object
    inputs = processor(images=image, return_tensors="pt")
    outputs = model.generate(**inputs)
    caption = processor.decode(outputs[0], skip_special_tokens=True)
    return caption

In [4]:
# Retrieve the PIL image and return the caption
def caption_image(image):
    try:
        caption = generate_caption(image)
        return caption
    except Exception as e:
        return f"An error occurred: {str(e)}"

In [5]:
iface = gr.Interface(
    fn=caption_image,
    inputs=gr.Image(type="pil"),
    outputs="text",
    title="Image Captioning with BLIP",
    description="Upload an image to generate a caption."
)

In [6]:
iface.launch()

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




In [7]:
iface.close()

Closing server running on port: 7860
