<a href="https://colab.research.google.com/github/sayanarajasekhar/GenerativeAiApplications/blob/main/Image_Captioning_BLIP_Model_Gradio_Interface.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## 1. Creating a simple interface for an image captioning model

BLIP (Bootstrapped Language Image Pretraining) model is used to generate captions for images

In [None]:
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image
import gradio as gr

processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

def generate_caption(image):
  '''
    Using PIL image object as input parameter
    return caption for the image generated by the model
  '''
  inputs = processor(images = image, return_tensors = "pt")
  outputs = model.generate(**inputs)
  caption = processor.decode(outputs[0], skip_special_tokens = True)

def caption_image(image):
  '''
    Takes PIL image as input and returns a caption
    with error handling
  '''
  try:
    caption = generate_caption(image)
    return caption
  except Exception as e:
    return f"An error occurred: {str(e)}"

iface = gr.Interface(
    fn = caption_image,
    inputs = gr.Image(type= "pil"),
    outputs = "text",
    title = "Image Captioning with BLIP",
    description = "Upload an image to generate a caption."
)

iface.launch(share = False)