In [1]:
!pip install torch==2.7.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu126
!git clone https://github.com/facebookresearch/sam3.git
%cd sam3
!pip install -e ".[notebooks]"
%cd ..
exit()

Looking in indexes: https://download.pytorch.org/whl/cu126
Collecting torch==2.7.0
  Downloading https://download.pytorch.org/whl/cu126/torch-2.7.0%2Bcu126-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (29 kB)
Collecting nvidia-cudnn-cu12==9.5.1.17 (from torch==2.7.0)
  Downloading https://pypi.nvidia.com/nvidia-cudnn-cu12/nvidia_cudnn_cu12-9.5.1.17-py3-none-manylinux_2_28_x86_64.whl (571.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m571.0/571.0 MB[0m [31m59.6 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting nvidia-cusparselt-cu12==0.6.3 (from torch==2.7.0)
  Downloading https://pypi.nvidia.com/nvidia-cusparselt-cu12/nvidia_cusparselt_cu12-0.6.3-py3-none-manylinux2014_x86_64.whl (156.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m156.8/156.8 MB[0m [31m117.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting nvidia-nccl-cu12==2.26.2 (from torch==2.7.0)
  Downloading https://pypi.nvidia.com/nvidia-nccl-cu12/nvidia_ncc

In [1]:
import torch
import torchvision
import numpy as np
!python --version
print("numpy version:", np.__version__)
print("PyTorch version:", torch.__version__)
print("Torchvision version:", torchvision.__version__)
print("CUDA is available:", torch.cuda.is_available())

Python 3.12.12
numpy version: 1.26.4
PyTorch version: 2.7.0+cu126
Torchvision version: 0.22.0+cu126
CUDA is available: True


In [2]:
import os
from kaggle_secrets import UserSecretsClient
from importlib import resources

path = resources.files("sam3").joinpath("sam3/assets/bpe_simple_vocab_16e6.txt.gz")
secret_label = "HF_TOKEN"
secret_value = UserSecretsClient().get_secret(secret_label)
os.environ["HF_TOKEN"] = secret_value


In [6]:
from PIL import Image
from sam3.model_builder import build_sam3_image_model
from sam3.model.sam3_image_processor import Sam3Processor
import torch
import torch.multiprocessing as mp
from glob import glob
from tqdm import tqdm
import shutil
from IPython.display import FileLink



In [8]:

# Load the model
model = build_sam3_image_model(bpe_path=path)
processor = Sam3Processor(model)
# Load an image
image = Image.open("/kaggle/input/volleyball-images/games-1-2-hard-data/games-1-2-hard-data/game1_set1_frame_14760.jpg")
inference_state = processor.set_image(image)
# Prompt the model with text
output = processor.set_text_prompt(state=inference_state, prompt="small volleyball in motion")

# Get the masks, bounding boxes, and scores
masks, boxes, scores = output["masks"], output["boxes"], output["scores"]

In [9]:
boxes,scores


(tensor([[923.3210, 216.5515, 946.2879, 249.5456]], device='cuda:0'),
 tensor([0.8165], device='cuda:0'))

In [10]:
exit()

In [3]:
# --- Configuration ---
INPUT_DIR = "/kaggle/input/volleyball-images/hard-data-v3"
OUTPUT_DIR = "/kaggle/working/yolo_dataset_hard_data_v3"
PROMPT = "small volleyball in motion"
CONF_THRESHOLD = 0.5
BPE_PATH = path 
NUM_GPUS = 2

In [4]:
def setup_folders(base_path):
    os.makedirs(os.path.join(base_path, "images"), exist_ok=True)
    os.makedirs(os.path.join(base_path, "labels"), exist_ok=True)

def create_download_link(zip_name, folder_path):
    # Create the zip archive
    print(f"Zipping {folder_path}... this may take a moment.")
    shutil.make_archive(zip_name, 'zip', folder_path)
    
    # Generate the download link
    zip_file = f"{zip_name}.zip"
    print(f"Zip created: {zip_file} ({os.path.getsize(zip_file) / (1024*1024):.2f} MB)")
    return FileLink(zip_file)

In [6]:
%%writefile worker.py
import os
import torch
from PIL import Image
from tqdm import tqdm

def process_images_on_gpu(gpu_id, image_paths, bpe_path, prompt, conf_threshold, output_dir):
    # Set the device for this process
    device = torch.device(f"cuda:{gpu_id}")
    torch.cuda.set_device(device)
    
    # Imports must happen here for the child process
    from sam3.model_builder import build_sam3_image_model
    from sam3.model.sam3_image_processor import Sam3Processor
    
    # Load model
    model = build_sam3_image_model(bpe_path=bpe_path).to(device)
    processor = Sam3Processor(model)
    
    for img_path in tqdm(image_paths, desc=f"GPU {gpu_id}"):
        file_name = os.path.basename(img_path)
        name_no_ext = os.path.splitext(file_name)[0]
        
        try:
            image = Image.open(img_path).convert("RGB")
            w, h = image.size
            
            inference_state = processor.set_image(image)
            output = processor.set_text_prompt(state=inference_state, prompt=prompt)
            
            boxes = output["boxes"]
            scores = output["scores"]
            
            label_path = os.path.join(output_dir, "labels", f"{name_no_ext}.txt")
            
            with open(label_path, "w") as f:
                for i in range(len(boxes)):
                    score = scores[i].item()
                    if score < conf_threshold:
                        continue
                    
                    box = boxes[i].cpu().numpy()
                    x1, y1, x2, y2 = box
                    
                    # YOLO Normalization logic
                    x_center = ((x1 + x2) / 2) / w
                    y_center = ((y1 + y2) / 2) / h
                    width = (x2 - x1) / w
                    height = (y2 - y1) / h
                    
                    f.write(f"0 {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n")
            
            # Save image to the local destination
            image.save(os.path.join(output_dir, "images", file_name))
        except Exception as e:
            print(f"Error processing {img_path}: {e}")

Overwriting worker.py


In [7]:
from worker import process_images_on_gpu
# 1. Setup
setup_folders(OUTPUT_DIR)
    
all_images = glob(os.path.join(INPUT_DIR, "*.jpg")) + glob(os.path.join(INPUT_DIR, "*.png"))
print(f"Found {len(all_images)} images.")

# Split images for 2 GPUs
chunk_size = (len(all_images) + NUM_GPUS - 1) // NUM_GPUS
img_chunks = [all_images[i:i + chunk_size] for i in range(0, len(all_images), chunk_size)]
    
# Ensure we don't try to use more chunks than we have GPUs
img_chunks = img_chunks[:NUM_GPUS]

mp.set_start_method('spawn', force=True)
processes = []
    
for i in range(len(img_chunks)):
    p = mp.Process(target=process_images_on_gpu, args=(
            i, img_chunks[i], BPE_PATH, PROMPT, CONF_THRESHOLD, OUTPUT_DIR
    ))
    p.start()
    processes.append(p)
    
for p in processes:
    p.join()
        
print(f"Done! Dataset ready in {OUTPUT_DIR}")

Found 2639 images.


GPU 0: 100%|██████████| 1320/1320 [44:53<00:00,  2.04s/it]
GPU 1: 100%|██████████| 1319/1319 [47:33<00:00,  2.16s/it]


Done! Dataset ready in /kaggle/working/yolo_dataset_hard_data_v3


In [8]:
folder_to_zip = '/kaggle/working/yolo_dataset_hard_data_v3'  # The folder you created
output_zip_name = 'yolo_dataset_hard_data_v3'         # What you want the zip file to be named
create_download_link(output_zip_name, folder_to_zip)

Zipping /kaggle/working/yolo_dataset_hard_data_v3... this may take a moment.
Zip created: yolo_dataset_hard_data_v3.zip (543.58 MB)


In [13]:
!rm -r yolo_dataset_game2-set1

In [25]:
from transformers import Sam3Processor, Sam3Model

ImportError: cannot import name 'Sam3Processor' from 'transformers' (/usr/local/lib/python3.12/dist-packages/transformers/__init__.py)