In [1]:
!pip install torch torchvision torchaudio ultralytics opencv-python
!pip install segment-anything opencv-python





[notice] A new release of pip is available: 24.1.2 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip





[notice] A new release of pip is available: 24.1.2 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import cv2
import torch
from ultralytics import YOLO
from segment_anything import SamPredictor, sam_model_registry
import numpy as np

In [4]:
!pip install ultralytics





[notice] A new release of pip is available: 24.1.2 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [6]:
from ultralytics import YOLO


In [7]:
yolo_model = YOLO('yolov8s-seg.pt')  # or a custom-trained model path

# Load SAM model
sam_checkpoint = "sam_vit_b_01ec64.pth"  # Replace with your SAM model path
sam_model_type = "vit_b"  # Could be "vit_b", "vit_l" based on the model
sam = sam_model_registry[sam_model_type](checkpoint=sam_checkpoint)
sam_predictor = SamPredictor(sam)

Downloading https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8s-seg.pt to 'yolov8s-seg.pt'...


100%|██████████| 22.8M/22.8M [00:01<00:00, 15.4MB/s]


In [8]:
# Function to detect shapes using YOLO
def detect_shapes(image):
    # Run YOLOv5 model inference
    results = yolo_model(image)

    # Extract bounding boxes from YOLO results
    detections = []
    for det in results.pred[0]:  # Iterate over detected objects
        x1, y1, x2, y2, conf, cls = det
        if conf > 0.5:  # Confidence threshold
            detections.append((int(x1), int(y1), int(x2), int(y2)))  # Bounding boxes
    
    return detections

In [9]:
# Function to perform SAM segmentation on detected shapes
def segment_with_sam(image, boxes):
    sam_predictor.set_image(image)
    masks = []
    
    for box in boxes:
        x1, y1, x2, y2 = box
        cropped_image = image[y1:y2, x1:x2]
        
        # Predict the mask within the bounding box using SAM
        box_np = np.array([x1, y1, x2, y2])  # Convert box to numpy array
        masks.append(sam_predictor.predict(box_np))
    
    return masks

In [10]:
# Main Function
def main(image_path):
    # Read image
    image = cv2.imread(image_path)
    
    # Step 1: Detect shapes using YOLO
    boxes = detect_shapes(image)
    
    # Step 2: Segment detected shapes using SAM
    masks = segment_with_sam(image, boxes)
    
    # Step 3: Visualize results (bounding boxes + masks)
    for (x1, y1, x2, y2), mask in zip(boxes, masks):
        # Draw bounding box
        cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
        
        # Apply the mask to the region of interest
        mask_img = np.zeros_like(image)
        mask_img[y1:y2, x1:x2] = mask['masks'][0]  # Assuming SAM mask output is binary

        # Overlay the mask on the original image
        image = cv2.addWeighted(image, 1, mask_img, 0.6, 0)
    
    # Show the result
    cv2.imshow("Detection and Segmentation", image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

In [None]:
if __name__ == "__main__":
    main("your_image_path.jpg")  # Replace with your image path

In [None]:
# Import required libraries for both camera streaming and segmentation
from pyniryo2 import *  # For controlling Niryo robot and accessing camera
import pyniryo  # Additional functionalities from pyniryo2
import cv2 as cv  # For image display and manipulation
import torch  # For running the SAM model on GPU/CPU
import numpy as np  # For handling arrays and masks
from segment_anything import SamPredictor, sam_model_registry  # SAM model utilities for segmentation
import time  # For timing and frame rate adjustments

# Set up the ROS instance for the Niryo robot, assuming the IP address is "10.10.10.10"
ros_instance = NiryoRos("10.10.10.10")

# Initialize the vision system for the Niryo robot's camera
vision = Vision(ros_instance)

# Initialize the SAM model (using the small version)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # Use GPU if available, otherwise use CPU
sam_checkpoint = "sam_vit_b_01ec64.pth"  # Path to the pre-trained SAM model checkpoint (small version)
model_type = "vit_b"  # Using the small version of the SAM model (vit_b)

# Load the SAM model using the specified checkpoint and model type
sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
sam.to(device)  # Move the SAM model to the specified device (GPU/CPU)
predictor = SamPredictor(sam)  # Initialize the SAM predictor object

# Define a function to detect circular objects using color thresholding
def detect_circles_by_color(image):
    """Detect circular objects using color thresholding and contour analysis."""
    # Convert the image to HSV color space for better color segmentation
    hsv = cv.cvtColor(image, cv.COLOR_BGR2HSV)

    # Define the color range for detecting circles (for example, detecting red circles)
    lower_color = np.array([0, 100, 100])  # Lower bound of the color (e.g., red)
    upper_color = np.array([10, 255, 255])  # Upper bound of the color (e.g., red)

    # Create a binary mask based on the defined color range
    mask = cv.inRange(hsv, lower_color, upper_color)

    # Use morphological operations to reduce noise
    kernel = cv.getStructuringElement(cv.MORPH_ELLIPSE, (5, 5))
    mask = cv.morphologyEx(mask, cv.MORPH_CLOSE, kernel)  # Close small holes
    mask = cv.morphologyEx(mask, cv.MORPH_OPEN, kernel)   # Remove small noise

    # Find contours in the binary mask
    contours, _ = cv.findContours(mask, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE)

    circles = []
    for contour in contours:
        # Approximate the contour and check if it is circular
        perimeter = cv.arcLength(contour, True)
        approx = cv.approxPolyDP(contour, 0.02 * perimeter, True)

        if len(approx) >= 5:  # If the shape has enough vertices, consider it a circle
            (x, y), radius = cv.minEnclosingCircle(contour)
            circles.append((int(x), int(y), int(radius)))

    return circles

# Main Function to capture images and run detection & segmentation every 3 seconds
def capture_and_process():
    while True:
        # Step 1: Fetch the compressed image from the Niryo camera
        img_compressed = vision.get_img_compressed()

        # Step 2: Get the camera calibration details (intrinsics and distortion coefficients)
        camera_info = vision.get_camera_intrinsics()

        # Step 3: Uncompress the image to get it ready for processing
        img_uncompressed = pyniryo.uncompress_image(img_compressed)

        # Step 4: Correct the image for camera distortion using the intrinsics and distortion coefficients
        img = pyniryo.undistort_image(img_uncompressed, camera_info.intrinsics, camera_info.distortion)

        # Step 5: Resize the frame for processing (optional)
        resized_frame = cv.resize(img, (640, 480))

        # Step 6: Detect circles in the resized frame using color thresholding
        circles = detect_circles_by_color(resized_frame)

        # Step 7: Prepare to store masks for segmented circles
        if circles:
            for circle in circles:
                x, y, r = circle  # Get the circle's center (x, y) and radius (r)

                # Create a mask for the detected circle
                circle_mask = np.zeros((resized_frame.shape[0], resized_frame.shape[1]), dtype=np.uint8)  # Create a black mask
                cv.circle(circle_mask, (x, y), r, 255, thickness=-1)  # Fill the circle region with white (255)

                # Convert the resized frame to RGB format for SAM
                resized_frame_rgb = cv.cvtColor(resized_frame, cv.COLOR_BGR2RGB)

                # Set the image for segmentation using SAM
                predictor.set_image(resized_frame_rgb)

                # Perform semantic segmentation on the entire frame
                masks, scores, _ = predictor.predict(point_coords=None, point_labels=None)

                # Use the best mask (highest score)
                best_mask_index = np.argmax(scores)
                best_mask = masks[best_mask_index].astype(np.uint8) * 255  # Convert the SAM mask to a binary mask (0 or 255)

                # Combine the circle mask and the segmentation mask
                combined_mask = cv.bitwise_and(best_mask, circle_mask)

                # Draw the contours of the combined mask on the original image
                contours, _ = cv.findContours(combined_mask, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE)
                for contour in contours:
                    cv.drawContours(resized_frame, [contour], -1, (0, 255, 0), 2)  # Draw contours in green

        # Display the results
        cv.imshow("Segmented Circles", resized_frame)

        # Wait for 3 seconds before capturing the next frame
        if cv.waitKey(3000) & 0xFF == ord('q'):  # Press 'q' to quit
            break

# Run the main function to capture and process frames
if __name__ == "__main__":
    capture_and_process()

# Cleanup resources
cv.destroyAllWindows()
