# BLIP
BLIP: Bootstrapping Language-Image Pre-training for Unified Vision-Language Understanding and Generation


## Enviroment


In [None]:
%pip install -q transformers

import os
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image

MODEL_PATH = 'Salesforce/blip-image-captioning-large' #@param {type:'string'}
processor = BlipProcessor.from_pretrained(MODEL_PATH)
model = BlipForConditionalGeneration.from_pretrained(MODEL_PATH).to("cuda")

def generate_caption(image_path: str) -> str:
    image = Image.open(image_path).convert('RGB')
    inputs = processor(image, return_tensors='pt').to('cuda')
    result = model.generate(**inputs)
    caption = processor.decode(result[0], skip_special_tokens=True)
    return caption


## Unconditional Image Captioning


### Single Image Captioning

In [None]:
INPUT_IMAGE = '/content/input.jpg' #@param {type:'string'}
print(generate_caption(INPUT_IMAGE))

### Batch Image Captioning

In [None]:
INPUT_DIR = '/content/input' #@param {type:'string'}
RENAME_INPUT_FILE = True #@param {type:'boolean'}
EXPORT_TEXT_FILE = False #@param {type:'boolean'}

for index, item in enumerate(os.listdir(INPUT_DIR)):
    filename, extension = os.path.splitext(item)
    filepath = f'{INPUT_DIR}/{item}'
    if not extension in ['.jpg', '.png']:
        continue
    caption = generate_caption(filepath)

    new_filepath = f'{INPUT_DIR}/{filename}'
    if RENAME_INPUT_FILE:
        new_filepath = f'{INPUT_DIR}/{caption}'
        os.rename(filepath, f'{new_filepath}{extension}')

    if EXPORT_TEXT_FILE:
        with open(f'{new_filepath}.txt', 'w') as file:
            file.write(caption)
