In [None]:
!pip install ultralytics supervision
!pip install git+https://github.com/NVlabs/describe-anything


In [None]:
!pip install --upgrade numpy




In [None]:
import time
import psutil
from IPython.display import display, Javascript

# Initialize tracking variables
if 'start_time' not in globals():
    start_time = time.time()
    initial_ram = psutil.virtual_memory().used / (1024 ** 3)  # GB
    print("Tracking started for all cells...")

In [None]:
import torch
import cv2
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from ultralytics import SAM, YOLO
from dam.describe_anything_model import DescribeAnythingModel

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'



In [None]:
sam = SAM("sam2_b.pt").to(DEVICE)
print("SAM 2 model loaded")

In [None]:
dam = DescribeAnythingModel(
    model_path="nvidia/DAM-3B",
    conv_mode="v1",
    prompt_mode="full+crop",
)
print("DAM model loaded")


In [None]:
image_path = "/content/1_v0Bm-HQxWtpbQ0Yq463uqw.jpg"  # Replace with your image path
image_bgr = cv2.imread(image_path)
if image_bgr is None:
    raise FileNotFoundError(f"Image not found at {image_path}")
image = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)

plt.figure(figsize=(10,10))
plt.imshow(image)
plt.axis('off')
plt.title("Original Image")
plt.show()


In [None]:
results = sam(image)
result = results[0]

masks_tensor = result.masks.data  # (num_masks, H, W)
masks = masks_tensor.cpu().numpy().astype(bool)

print(f"Generated {len(masks)} masks")


In [None]:
def overlay_mask(image, mask, color=(0, 255, 0), alpha=0.5):
    overlay = image.copy()
    mask_bool = mask.astype(bool)
    overlay[mask_bool] = (overlay[mask_bool] * (1 - alpha) + np.array(color) * alpha).astype(np.uint8)
    return overlay

colors = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (255, 0, 255)]
image_vis = image.copy()

for i, mask in enumerate(masks):
    color = colors[i % len(colors)]
    image_vis = overlay_mask(image_vis, mask, color=color, alpha=0.4)

plt.figure(figsize=(12, 12))
plt.imshow(image_vis)
plt.axis('off')
plt.title("Image with SAM Masks Overlayed")
plt.show()


In [None]:
def numpy_to_pil(img_np):
    return Image.fromarray(img_np)

descriptions = []

for i, mask in enumerate(masks):
    # Convert mask to uint8 for contour detection
    mask_uint8 = mask.astype(np.uint8) * 255

    # Find bounding box of mask
    contours, _ = cv2.findContours(mask_uint8, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if not contours:
        continue
    x, y, w, h = cv2.boundingRect(contours[0])

    # Crop image and mask
    cropped_img = image[y:y+h, x:x+w]
    cropped_mask = mask_uint8[y:y+h, x:x+w]

    # Apply mask to crop
    masked_crop = cv2.bitwise_and(cropped_img, cropped_img, mask=cropped_mask)

    # Display cropped masked object
    plt.figure(figsize=(4,4))
    plt.title(f"Object {i+1}")
    plt.imshow(masked_crop)
    plt.axis('off')
    plt.show()

    # Convert to PIL Image for DAM
    cropped_pil = numpy_to_pil(masked_crop)

    # Generate description using correct DAM API
    description = dam.get_description(
        image_pil=cropped_pil,  # Pass cropped region
        mask_pil=Image.fromarray(cropped_mask),  # Mask for the cropped area
        query="<image> Describe this object in detail.",
        temperature=0.2,
        top_p=0.9,
        num_beams=1,
        max_new_tokens=512
    )

    print(f"Description for Object {i+1}:\n{description}\n")
    descriptions.append(description)


In [None]:
import nbformat

def clean_notebook(input_path, output_path=None):
    if output_path is None:
        output_path = input_path
    nb = nbformat.read(input_path, as_version=4)
    if 'widgets' in nb.get('metadata', {}):
        del nb['metadata']['widgets']
    nbformat.write(nb, output_path)

clean_notebook("SAM2_WO_YOLO.ipynb")