In [1]:
!pip install transformers accelerate bitsandbytes torch pillow tqdm

Installing collected packages: bitsandbytes
Successfully installed bitsandbytes-0.47.0


In [None]:
import time
import json
from PIL import Image
from transformers import BitsAndBytesConfig, InstructBlipProcessor, InstructBlipForConditionalGeneration

# === Load BLIP-2 Instruct Model ===
bnb_config = BitsAndBytesConfig(load_in_8bit=True)
blip_processor = InstructBlipProcessor.from_pretrained("Salesforce/instructblip-vicuna-7b")
blip_model = InstructBlipForConditionalGeneration.from_pretrained(
    "Salesforce/instructblip-vicuna-7b",
    device_map="auto",
    quantization_config=bnb_config
)

# === Prompt ===
prompt = (
    "Describe the image with a focus on the intricate details of the object, "
    "including their color, shape, and number. Include any physical aspects that "
    "appear unusual or incorrect according to general knowledge."
)

# === Function to get caption ===
def get_blip2_caption(img_path):
    img = Image.open(img_path).convert("RGB")
    inputs = blip_processor(img, prompt, return_tensors="pt").to(blip_model.device)
    out = blip_model.generate(**inputs, max_length=100, do_sample=False)
    caption = blip_processor.decode(out[0], skip_special_tokens=True)
    if caption.startswith(prompt):
        caption = caption[len(prompt):].strip()
    return caption

# === Define 5 image paths manually ===
image_paths = [
    "/content/drive/MyDrive/Img_folder/1.jpg",
    "/content/drive/MyDrive/Img_folder/2.jpg",
    "/content/drive/MyDrive/Img_folder/3.jpg",
    "/content/drive/MyDrive/Img_folder/4.jpg",
    "/content/drive/MyDrive/Img_folder/5.jpg"

]

# === Throughput Test ===
start_time = time.time()
successful = 0

for i, img_path in enumerate(image_paths):
    print(f"Processing {i+1}/5: {img_path}")
    try:
        caption = get_blip2_caption(img_path)
        print(f"  Caption: {caption}")
        successful += 1
    except Exception as e:
        print(f"  Failed: {e}")

end_time = time.time()
total_time = end_time - start_time

# === Results ===
print(f"\n--- THROUGHPUT REPORT ---")
print(f"Total time for {successful} images: {total_time:.2f} seconds")
if successful > 0:
    print(f"Throughput: {successful / total_time:.2f} images per second")
else:
    print("No images were successfully captioned.")
