In [1]:
!wget https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth -O sam_vit_h.pth
!pip install git+https://github.com/facebookresearch/segment-anything.git

--2025-08-27 14:11:42--  https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth
Resolving dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)... 3.163.189.108, 3.163.189.14, 3.163.189.51, ...
Connecting to dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)|3.163.189.108|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2564550879 (2.4G) [binary/octet-stream]
Saving to: ‘sam_vit_h.pth’


2025-08-27 14:11:57 (170 MB/s) - ‘sam_vit_h.pth’ saved [2564550879/2564550879]

Collecting git+https://github.com/facebookresearch/segment-anything.git
  Cloning https://github.com/facebookresearch/segment-anything.git to /tmp/pip-req-build-mvw5e2m2
  Running command git clone --filter=blob:none --quiet https://github.com/facebookresearch/segment-anything.git /tmp/pip-req-build-mvw5e2m2
  Resolved https://github.com/facebookresearch/segment-anything.git to commit dca509fe793f601edb92606367a655c15ac00fdf
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels f

In [7]:
import os
import json
import numpy as np
import cv2
from segment_anything import sam_model_registry, SamPredictor

# ----------------------
# SAM setup
# ----------------------
sam_checkpoint = "sam_vit_h.pth"  # path to SAM weights
model_type = "vit_h"
sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
sam.to(device="cuda")  # or "cpu" if no GPU
predictor = SamPredictor(sam)

# ----------------------
# Load Grounding DINO JSON
# ----------------------
json_path = "SDXL_1.0_groundingdino_labeled.json"
with open(json_path, "r") as f:
    data = json.load(f)

# ----------------------
# Base folder for images
# ----------------------
base_image_path = "/content/drive/MyDrive/SDXL_1.0"  # change to your image folder

# ----------------------
# Process each image
# ----------------------
final_results = []

for item in data:
    image_path = os.path.join(base_image_path, item["image"])
    image = cv2.imread(image_path)

    if image is None:
        print(f" Image not found: {image_path}")
        continue

    predictor.set_image(image)

    refined_boxes = []
    for det in item["boxes"]:
        box = det["box"]  # Grounding DINO box format: [x1, y1, x2, y2] normalized 0-1
        label = det.get("label", "")
        logit = det.get("logit", 0.0)

        # Convert normalized coordinates to absolute pixel values
        h, w = image.shape[:2]
        abs_box = [
            int(box[0] * w),
            int(box[1] * h),
            int(box[2] * w),
            int(box[3] * h)
        ]
        input_box = np.array(abs_box)

        # Predict mask with SAM
        masks, scores, _ = predictor.predict(
            box=input_box[None, :],  # shape (1,4)
            multimask_output=False
        )

        refined_boxes.append({
            "box": box,  # keep normalized box
            "logit": logit,
            "label": label,
            "mask": masks[0].astype("uint8").tolist()  # convert to list for JSON
        })

    final_results.append({
        "image": item["image"],
        "boxes": refined_boxes
    })

# ----------------------
# Save new JSON with SAM masks
# ----------------------
out_path = "SDXL_1.0_groundingdino_sam_refined.json"
with open(out_path, "w") as f:
    json.dump(final_results, f)

print(f"Saved SAM-refined JSON to: {out_path}")


Saved SAM-refined JSON to: SDXL_1.0_groundingdino_sam_refined.json
