# ðŸ“· Image-to-Text Generator Demo

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/tharun-ship-it/image-to-text-generator/blob/main/notebooks/demo.ipynb)

**Author:** Tharun Ponnam

**Model:** Salesforce/blip-image-captioning-large

In [None]:
!pip install torch transformers pillow -q

In [None]:
import torch
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration

print(f'PyTorch: {torch.__version__}')
print(f'CUDA: {torch.cuda.is_available()}')

In [None]:
# Load BLIP model
model_name = 'Salesforce/blip-image-captioning-large'
processor = BlipProcessor.from_pretrained(model_name)
model = BlipForConditionalGeneration.from_pretrained(model_name)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
model.eval()
print(f'Model loaded on {device}')

In [None]:
def generate_caption(image, prompt=None):
    if image.mode != 'RGB':
        image = image.convert('RGB')
    if prompt:
        inputs = processor(image, prompt, return_tensors='pt').to(device)
    else:
        inputs = processor(image, return_tensors='pt').to(device)
    with torch.no_grad():
        output = model.generate(**inputs, max_length=50, num_beams=5)
    return processor.decode(output[0], skip_special_tokens=True)

In [None]:
# Test with URL
from urllib.request import urlopen
import matplotlib.pyplot as plt

url = 'https://images.unsplash.com/photo-1514888286974-6c03e2ca1dba?w=400'
image = Image.open(urlopen(url))

plt.figure(figsize=(8, 6))
plt.imshow(image)
plt.axis('off')
plt.show()

caption = generate_caption(image)
print(f'Caption: {caption}')