# Automated Image Captioning and visual QnA Engine

## Library Imports

In [12]:
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration, BlipForQuestionAnswering
import gradio as gr
import torch
import warnings
warnings.filterwarnings("ignore")

## Image Captioning

In [13]:
captioning_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
captioning_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

In [14]:
def caption(image):
    image = image.convert("RGB")

    inputs = captioning_processor(image, return_tensors="pt")

    out = captioning_model.generate(**inputs)
    return captioning_processor.decode(out[0], skip_special_tokens=True)


## Visual QnA

In [15]:
processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base")

In [16]:
def qna(image, question):
    image = image.convert("RGB")
    inputs = processor(image, question, return_tensors="pt")

    out = model.generate(**inputs)
    return processor.decode(out[0], skip_special_tokens=True)

## Gradio Interface

In [17]:
title = "Automated Image Captioning and Visual QnA Engine"

In [20]:
interface1 = gr.Interface(fn=caption,
                         inputs=gr.inputs.Image(type="pil"),
                         outputs=gr.outputs.Textbox(label="Generated  Caption by BLIP"),
                         description="BLIP Image Captioning")

In [21]:
interface2 = gr.Interface(fn=qna, 
                         inputs=[gr.inputs.Image(type="pil"), gr.inputs.Textbox(label="Question")],
                         outputs=gr.outputs.Textbox(label="Answer generated by BLIP"),
                         description="BLIP Visual Question Answering of Images")



In [22]:
final_interface = gr.TabbedInterface([interface1, interface2], ["Captioning", "Visual QnA"], title=title,theme=gr.themes.Default())
final_interface.launch(inbrowser=True)



Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




