In [1]:
!pip install ultralytics
!pip install git+https://github.com/facebookresearch/segment-anything.git
!pip install opencv-python-headless


Collecting git+https://github.com/facebookresearch/segment-anything.git
  Cloning https://github.com/facebookresearch/segment-anything.git to /tmp/pip-req-build-vfh7741x
  Running command git clone --filter=blob:none --quiet https://github.com/facebookresearch/segment-anything.git /tmp/pip-req-build-vfh7741x
  Resolved https://github.com/facebookresearch/segment-anything.git to commit dca509fe793f601edb92606367a655c15ac00fdf
  Preparing metadata (setup.py) ... [?25l[?25hdone


In [2]:
!wget https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt -O yolov8n.pt


--2024-12-16 15:03:21--  https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt
Resolving github.com (github.com)... 140.82.121.3
Connecting to github.com (github.com)|140.82.121.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/521807533/732c503e-9fcb-4a82-be7f-106baafbda15?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=releaseassetproduction%2F20241216%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20241216T150322Z&X-Amz-Expires=300&X-Amz-Signature=f921171cb9307e185a9499d11a50529db28362da073cb44f4ae8f9eb08c3199e&X-Amz-SignedHeaders=host&response-content-disposition=attachment%3B%20filename%3Dyolov8n.pt&response-content-type=application%2Foctet-stream [following]
--2024-12-16 15:03:22--  https://objects.githubusercontent.com/github-production-release-asset-2e65be/521807533/732c503e-9fcb-4a82-be7f-106baafbda15?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=re

In [3]:
!wget https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth -O sam_vit_h_4b8939.pth


--2024-12-16 15:03:22--  https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth
Resolving dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)... 13.227.219.59, 13.227.219.70, 13.227.219.10, ...
Connecting to dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)|13.227.219.59|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2564550879 (2.4G) [binary/octet-stream]
Saving to: ‘sam_vit_h_4b8939.pth’


2024-12-16 15:03:33 (223 MB/s) - ‘sam_vit_h_4b8939.pth’ saved [2564550879/2564550879]



In [None]:
import cv2
from ultralytics import YOLO
import numpy as np
from segment_anything import SamAutomaticMaskGenerator, sam_model_registry

# Load YOLOv8 model
yolo_model = YOLO('models/yolov8n.pt')

# Load SAM model
sam_checkpoint = "models/sam_vit_h_4b8939.pth"
sam = sam_model_registry["vit_h"](checkpoint=sam_checkpoint)
mask_generator = SamAutomaticMaskGenerator(sam)

# Input and output paths
input_video_path = "videos/9936784-hd_1920_1080_25fps.mp4"
output_video_path = "output/processed_video.avi"

# Video capture and writer setup
video = cv2.VideoCapture(input_video_path)
frame_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(video.get(cv2.CAP_PROP_FPS))

fourcc = cv2.VideoWriter_fourcc(*'XVID')
output_video = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

while video.isOpened():
    ret, frame = video.read()
    if not ret:
        break

    # YOLO inference
    results = yolo_model(frame)
    detections = results[0].boxes.xyxy.cpu().numpy()

    # Draw YOLO detections
    for box in detections:
        x_min, y_min, x_max, y_max = map(int, box[:4])
        cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)

    # SAM segmentation
    segmentation = mask_generator.generate(frame)
    for mask in segmentation:
      contours, _ = cv2.findContours((mask['segmentation'] * 255).astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
      cv2.drawContours(frame, contours, -1, (255, 0, 0), 2)
    # Write the processed frame to the output video
    output_video.write(frame)

# Release resources
video.release()
output_video.release()
cv2.destroyAllWindows()



0: 384x640 1 cat, 1 remote, 104.2ms
Speed: 17.9ms preprocess, 104.2ms inference, 848.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 cat, 32.8ms
Speed: 3.4ms preprocess, 32.8ms inference, 4.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 cat, 18.4ms
Speed: 3.5ms preprocess, 18.4ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 cat, 15.5ms
Speed: 4.4ms preprocess, 15.5ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 cat, 13.8ms
Speed: 4.0ms preprocess, 13.8ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 cat, 1 cup, 11.4ms
Speed: 3.3ms preprocess, 11.4ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 cup, 24.3ms
Speed: 4.1ms preprocess, 24.3ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 cat, 1 cup, 12.4ms
Speed: 3.3ms preprocess, 12.4ms inference, 2.2ms postprocess per image at shape (1, 