# SAM2 Segmentation with Webcam Demo

This notebook demonstrates how to use SAM2 segmentation with frames captured from a webcam.

In [None]:
# Install required packages if not already installed

import sys
import subprocess

def install_package(package):
    subprocess.check_call([sys.executable, "-m", "pip", "install", package])

# Check if segment-anything is installed
try:
    import segment_anything
except ImportError:
    print("Installing segment-anything...")
    install_package("git+https://github.com/facebookresearch/segment-anything.git")

print("Required packages are installed.")

In [None]:
# Import required libraries

import cv2
import numpy as np
import torch
import time
from IPython.display import display, clear_output
import matplotlib.pyplot as plt

# Add src to path to import our custom SAM2 segmentation class
import sys
from pathlib import Path
sys.path.append(str(Path().resolve()))

from sam2_segmentation import SAM2Segmentation

print("Libraries imported successfully.")

In [None]:
# Initialize SAM2 segmentation

sam_seg = SAM2Segmentation("src/sam2.1_b.pt", "vit_b")  # Using base model for speed

# Load model
if not sam_seg.load_model():
    print("Failed to load SAM2 model")
else:
    print("SAM2 model loaded successfully!")

In [None]:
# Function to capture a single frame from webcam

def capture_frame(webcam_id=0):
    """
    Capture a single frame from the webcam.
    
    Args:
        webcam_id (int): ID of the webcam to use (default: 0)
        
    Returns:
        np.ndarray: Captured frame, or None if failed
    """
    cap = cv2.VideoCapture(webcam_id)
    
    if not cap.isOpened():
        print(f"Cannot open webcam {webcam_id}")
        return None
    
    # Set resolution (optional)
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
    
    # Capture frame
    ret, frame = cap.read()
    cap.release()
    
    if not ret:
        print("Failed to capture frame")
        return None
    
    return frame

In [None]:
# Capture a frame and perform segmentation

print("Capturing frame from webcam...")
frame = capture_frame()

if frame is not None:
    print("Frame captured successfully. Performing segmentation...")
    
    # Perform segmentation
    start_time = time.time()
    result = sam_seg.segment_frame(frame)
    inference_time = time.time() - start_time
    
    if result is not None:
        print(f"Segmentation completed in {inference_time:.2f} seconds")
        
        # Visualize results
        vis_frame = sam_seg.visualize_segmentation(frame, result)
        
        # Convert BGR to RGB for matplotlib
        vis_frame_rgb = cv2.cvtColor(vis_frame, cv2.COLOR_BGR2RGB)
        original_frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        # Display results
        fig, axes = plt.subplots(1, 2, figsize=(15, 8))
        
        axes[0].imshow(original_frame_rgb)
        axes[0].set_title("Original Frame")
        axes[0].axis('off')
        
        axes[1].imshow(vis_frame_rgb)
        axes[1].set_title(f"SAM2 Segmentation Results (Time: {inference_time:.2f}s)")
        axes[1].axis('off')
        
        plt.tight_layout()
        plt.show()
        
        # Print segmentation info
        masks = result.get("masks", None)
        scores = result.get("scores", np.array([]))
        
        if masks is not None and len(masks) > 0:
            print(f"\nGenerated {len(masks)} segmentation masks:")
            for i, (mask, score) in enumerate(zip(masks, scores)):
                mask_area = np.sum(mask > 0.5) if mask.dtype != np.uint8 else np.sum(mask)
                print(f"  - Mask {i+1}: Score {score:.3f}, Area {mask_area} pixels")
        else:
            print("No segmentation masks generated.")
    else:
        print("Segmentation failed.")
else:
    print("Failed to capture frame from webcam.")

## Real-time Webcam Segmentation

The following cell will start a real-time segmentation demo. Press the stop button (⏹️) in Jupyter to stop the demo.

In [None]:
# Real-time webcam segmentation demo
# Note: This will run continuously until stopped manually

print("Starting real-time webcam segmentation demo...")
print("Press the stop button (⏹️) in Jupyter to stop the demo.")

# Open webcam
cap = cv2.VideoCapture(0)

if not cap.isOpened():
    print("Cannot open webcam")
else:
    try:
        while True:
            # Capture frame
            ret, frame = cap.read()
            if not ret:
                print("Failed to capture frame")
                break
            
            # Perform segmentation
            start_time = time.time()
            result = sam_seg.segment_frame(frame)
            inference_time = time.time() - start_time
            
            if result is not None:
                # Visualize results
                vis_frame = sam_seg.visualize_segmentation(frame, result)
                
                # Add FPS counter
                fps = 1.0 / inference_time if inference_time > 0 else 0
                cv2.putText(vis_frame, f"SAM2 FPS: {fps:.1f}", (10, 30),
                           cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
                
                # Convert to RGB for matplotlib
                vis_frame_rgb = cv2.cvtColor(vis_frame, cv2.COLOR_BGR2RGB)
                
                # Display frame
                clear_output(wait=True)
                plt.figure(figsize=(12, 8))
                plt.imshow(vis_frame_rgb)
                plt.title("Real-time SAM2 Segmentation")
                plt.axis('off')
                plt.show()
                
                # Print segmentation info
                masks = result.get("masks", None)
                if masks is not None:
                    print(f"Generated {len(masks)} masks (FPS: {fps:.1f})")
                else:
                    print(f"No masks generated (FPS: {fps:.1f})")
            else:
                print("Segmentation failed")
                
    except KeyboardInterrupt:
        print("Demo interrupted by user")
    except Exception as e:
        print(f"Error during demo: {e}")
    finally:
        # Clean up
        cap.release()
        plt.close('all')

## Interactive Point-based Segmentation

SAM2 excels at point-based segmentation. Here's how you can interactively select points for segmentation:

In [None]:
# Interactive point-based segmentation example
# This would require additional UI components in a full implementation

print("Interactive point-based segmentation:")
print("In a full implementation, you would be able to click on the image to select points for segmentation.")
print("Points can be labeled as foreground (positive) or background (negative) to guide the segmentation.")