# Image Caption Generation for Text-to-Image Fine-Tuning

In [None]:
import os
import pandas as pd
from PIL import Image

# Load the BLIP-2 Model for Image Captioning

In [None]:
from transformers import BlipProcessor, BlipForConditionalGeneration

processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large").to("cuda")


# Function to Generate Captions for each Image in a folder

In [None]:
def generate_captions(folder_path):
    df = pd.DataFrame()
    
    processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
    model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large").to("cuda")

    image_folder = os.listdir(folder_path)
    
    for image in image_folder:
        if image.endswith('.jpg') or image.endswith('.png'):
            image_path = os.path.join(folder_path,image)

            raw_image = Image.open(image_path).convert('RGB')

            inputs = processor(raw_image, return_tensors='pt').to('cuda')
            out = model.generate(**inputs)
            caption = processor.decode(out[0], skip_special_tokens=True)

            temp_dict = {'file_name':image, 'text':caption}
            temp_df = pd.DataFrame(temp_dict, index=[0])

            df = pd.concat([df,temp_df],ignore_index=True)

    return df



In [None]:
image_captions = generate_captions('PATH_TO_IMAGES_FOLDER')  #Specify Path to your folder with Images.

In [None]:
image_captions

# Save the Captions to metadata.csv file in same folder as Images.

In [None]:
image_captions.to_csv('PATH_TO_IMAGES_FOLDER/metadata.csv', index=False)

# Create an "ImageFolder" Dataset and push it to HuggingFace Hub.

In [None]:
from datasets import load_dataset

dataset = load_dataset('imagefolder', data_dir = 'PATH_TO_IMAGES_FOLDER', split='train')

In [None]:
from huggingface_hub import login

login(token="YOUR_HUGGINGFACE_TOKEN")

In [None]:
dataset.push_to_hub('PATH/NAME_OF_DATASET', private=True)