In [1]:
import requests

import torch
from PIL import Image
from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection

model_id = "IDEA-Research/grounding-dino-base"

processor = AutoProcessor.from_pretrained(model_id)
device = "cuda"
model = AutoModelForZeroShotObjectDetection.from_pretrained(model_id).to(device)


2024-07-23 14:11:42.273831: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:479] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-23 14:11:42.289388: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:10575] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-23 14:11:42.289412: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1442] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-07-23 14:11:42.299251: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
from pathlib import Path

def run_sam(image, text):
    inputs = processor(images=image, text=text, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = model(**inputs)
    
    # convert outputs (bounding boxes and class logits) to COCO API
    
    target_sizes = torch.tensor([image.size[::-1]])
    
    results = processor.image_processor.post_process_object_detection(
    
        outputs, threshold=0.35, target_sizes=target_sizes
    
    )[0]

    box_list = []
    for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
    
        box = [round(i, 1) for i in box.tolist()]
        conf = round(score.item(), 2)
        box_list.append((box, conf))
    
        print(f"Detected {label.item()} with confidence " f"{round(score.item(), 2)} at location {box}")
    return box_list

In [10]:
import ultralytics
from ultralytics.utils.plotting import Annotator
import cv2

file_list = sorted(Path("plot/f1").glob("*jpg"))
print("episodes:", len(file_list))
# file_list = file_list[:10]

for file in file_list:
    print(file)
    image = Image.open(file)
    # Check for cats and remote controls
    text = "a pen."

    boxes = run_sam(image, text)

    plot_path = Path("plot/detect") / (file.name)
    plot_path.parent.mkdir(parents=True, exist_ok=True)

    annotator = Annotator(image)
    for box, conf in boxes:
        annotator.box_label(box, label=str(conf), color=(0, 204, 0)) # The bounding box coordinates (x1, y1, x2, y2)
    
    result = annotator.result()
    cv2.imwrite(plot_path, cv2.cvtColor(result, cv2.COLOR_RGB2BGR), [cv2.IMWRITE_JPEG_QUALITY, 100])

episodes: 52
plot/f1/2023-03-02-14h-58m-51s.jpg
Detected 1 with confidence 0.36 at location [775.5, 341.5, 819.2, 383.7]
plot/f1/2023-03-02-16h-53m-07s.jpg
Detected 1 with confidence 0.38 at location [732.3, 381.9, 750.8, 417.4]
plot/f1/2023-03-03-15h-54m-59s.jpg
plot/f1/2023-03-06-16h-13m-25s.jpg
Detected 1 with confidence 0.62 at location [754.4, 468.8, 922.7, 553.5]
plot/f1/2023-03-06-17h-24m-17s.jpg
Detected 1 with confidence 0.42 at location [719.4, 363.4, 807.1, 424.8]
plot/f1/2023-03-07-16h-16m-38s.jpg
Detected 1 with confidence 0.41 at location [650.6, 292.7, 715.4, 342.0]
plot/f1/2023-03-07-17h-57m-28s.jpg
Detected 1 with confidence 0.43 at location [710.5, 494.8, 780.7, 555.4]
plot/f1/2023-03-08-13h-31m-50s.jpg
plot/f1/2023-03-08-14h-52m-01s.jpg
Detected 1 with confidence 0.41 at location [730.6, 279.5, 755.4, 335.2]
plot/f1/2023-03-08-19h-36m-27s.jpg
Detected 1 with confidence 0.35 at location [684.3, 543.6, 752.6, 607.7]
plot/f1/2023-03-08-19h-55m-12s.jpg
Detected 1 with co

True