In [None]:
pip install transformers pandas pillow



In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image
import pandas as pd

# Load BLIP model and processor
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

# Folder containing images
image_folder = "/content/drive/MyDrive/Trial_dataset"
output_csv = "annotations.csv"

# Initialize results list
results = []

# Process images
for img_file in os.listdir(image_folder):
    if img_file.endswith(('.png', '.jpg', '.jpeg')):
        img_path = os.path.join(image_folder, img_file)
        image = Image.open(img_path).convert("RGB")

        # Generate caption
        inputs = processor(images=image, return_tensors="pt")
        outputs = model.generate(**inputs)
        caption = processor.decode(outputs[0], skip_special_tokens=True)

        # Append to results
        results.append({"filename": img_file, "description": caption})

# Save to CSV
df = pd.DataFrame(results)
df.to_csv(output_csv, index=False)

print(f"Descriptions saved to {output_csv}")



Descriptions saved to annotations.csv


In [None]:
for result in results:
    description = result["description"]
    query = f"What is in the image described as: '{description}'?"
    result["query"] = query

In [None]:
from multiprocessing import Pool

def process_image(img_file):
    # Load and process image, generate description and query
    image = Image.open(img_file).convert("RGB")
    inputs = processor(images=image, return_tensors="pt")
    outputs = model.generate(**inputs)
    caption = processor.decode(outputs[0], skip_special_tokens=True)
    query = f"What is in the image described as: '{caption}'?"
    return {"filename": img_file, "description": caption, "query": query}

# Path to images
image_folder = "/content/drive/MyDrive/Trial_dataset"
image_paths = [os.path.join(image_folder, f) for f in os.listdir(image_folder) if f.endswith(('.png', '.jpg', '.jpeg'))]

# Process images in parallel
with Pool(processes=4) as pool:  # Adjust process count based on system resources
    results = pool.map(process_image, image_paths)

# Save to CSV
df = pd.DataFrame(results)
df.to_csv(output_csv, index=False)


In [None]:
# Save DataFrame to CSV
df.to_csv("annotations.csv", index=False)

In [None]:
import json

# Convert DataFrame to dictionary
data_dict = df.to_dict(orient="records")

# Save as JSON
with open("annotations.json", "w") as json_file:
    json.dump(data_dict, json_file, indent=4)

print("Annotations saved to annotations.json")


Annotations saved to annotations.json


In [None]:
# Load JSON to verify
with open("annotations.json", "r") as json_file:
    annotations = json.load(json_file)

print(annotations[:5])  # Print the first 5 entries

[{'filename': '/content/drive/MyDrive/Trial_dataset/118.png', 'description': 'a man in camouflage clothing holding a rifle', 'query': "What is in the image described as: 'a man in camouflage clothing holding a rifle'?"}, {'filename': '/content/drive/MyDrive/Trial_dataset/Automatic Rifle_16.png', 'description': 'a gun with a barrel and a barrel', 'query': "What is in the image described as: 'a gun with a barrel and a barrel'?"}, {'filename': '/content/drive/MyDrive/Trial_dataset/111.png', 'description': 'a soldier with a gun in his hand stock photo', 'query': "What is in the image described as: 'a soldier with a gun in his hand stock photo'?"}, {'filename': '/content/drive/MyDrive/Trial_dataset/112.png', 'description': 'a man in camouflage clothing holding a rifle', 'query': "What is in the image described as: 'a man in camouflage clothing holding a rifle'?"}, {'filename': '/content/drive/MyDrive/Trial_dataset/110.png', 'description': 'a man in a black suit and helmet with a gun', 'quer