# SAM + CLIP Segmentation Demo
This notebook demonstrates open-vocabulary segmentation using SAM and CLIP.
You can change image, prompt, grid spacing, and size filtering easily.

In [ ]:
# Imports
import torch
from sam_clip.models import load_sam, load_clip
from sam_clip.sam_inference import generate_sam_masks
from sam_clip.clip_filtering import filter_masks_with_clip

In [ ]:
# Device setup
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

In [ ]:
# Load models (once)
sam_processor, sam_model = load_sam()
clip_processor, clip_model = load_clip(device)

## Parameters

In [ ]:
# Image path and prompt
image_path = '../data/cat.jpeg'  # replace with your image
prompt = 'dog sitting on grass'

# SAM grid spacing (smaller = more masks, slower)
grid_spacing = 50

# CLIP filtering
clip_threshold = 35  # adjust from 0-100
size_mode = 's'      # 's'=small, 'm'=medium, 'l'=large, 'mixed'

## Generate SAM Masks

In [ ]:
orig_img, all_masks = generate_sam_masks(
    image_path,
    sam_processor,
    sam_model,
    grid_spacing=grid_spacing
)

## Filter masks with CLIP

In [ ]:
result_img, filtered_masks, scores = filter_masks_with_clip(
    orig_img,
    all_masks,
    text_prompt=prompt,
    clip_processor=clip_processor,
    clip_model=clip_model,
    device=device,
    clip_threshold=clip_threshold,
    size_mode=size_mode,
    preview=True  # show crops interactively
)

## Display final overlay

In [ ]:
result_img.show()