<a href="https://colab.research.google.com/github/kapilgarg/Robotics/blob/main/YOLO.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
! pip install ultralytics


Collecting ultralytics
  Downloading ultralytics-8.3.235-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.18 (from ultralytics)
  Downloading ultralytics_thop-2.0.18-py3-none-any.whl.metadata (14 kB)
Downloading ultralytics-8.3.235-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.18-py3-none-any.whl (28 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.3.235 ultralytics-thop-2.0.18


In [3]:
from ultralytics import YOLO
import cv2

model = YOLO('yolov8n.pt')

[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt': 100% ━━━━━━━━━━━━ 6.2MB 23.3MB/s 0.3s


In [8]:
from ultralytics import YOLO
import cv2

# Load pre-trained model
model = YOLO('yolov8n.pt')  # Downloads automatically if not present
# Options: yolov8n.pt (nano), yolov8s.pt (small), yolov8m.pt (medium),
#          yolov8l.pt (large), yolov8x.pt (xlarge)

# ===== METHOD 1: Single Image =====
def detect_image(image_path):
    # Run inference
    results = model(image_path)

    # Process results
    for result in results:
        boxes = result.boxes  # Bounding boxes
        for box in boxes:
            # Get box coordinates
            x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()

            # Get confidence and class
            conf = float(box.conf[0])
            cls = int(box.cls[0])
            class_name = model.names[cls]

            print(f"Detected: {class_name}, Confidence: {conf:.2f}")
            print(f"Box: ({x1:.0f}, {y1:.0f}), ({x2:.0f}, {y2:.0f})")

    # Show annotated image
    # annotated_frame = results[0].plot()
    # cv2.imshow('YOLOv8 Detection', annotated_frame)
    # cv2.waitKey(0)
    # cv2.destroyAllWindows()

# ===== METHOD 2: Webcam/Video Stream =====
def detect_webcam():
    cap = cv2.VideoCapture(0)  # 0 for default webcam

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Run inference on frame
        results = model(frame, verbose=False)  # verbose=False to reduce print output

        # Get annotated frame
        annotated_frame = results[0].plot()

        # Display
        cv2.imshow('YOLOv8 Webcam', annotated_frame)

        # Press 'q' to quit
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

# ===== METHOD 3: Video File =====
def detect_video(video_path):
    cap = cv2.VideoCapture(video_path)

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        results = model(frame)
        annotated_frame = results[0].plot()

        cv2.imshow('YOLOv8 Video', annotated_frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

# ===== METHOD 4: With Action Based on Detection =====
def detect_and_act(image_path):
    results = model(image_path, conf=0.5)  # confidence threshold

    for result in results:
        boxes = result.boxes
        for box in boxes:
            cls = int(box.cls[0])
            conf = float(box.conf[0])
            class_name = model.names[cls]

            # Take action based on detected object
            if class_name == 'person':
                print("Person detected! Taking action...")
                # Your robot action here
            elif class_name == 'bottle':
                print("Bottle detected! Picking up...")
                # Your robot action here
            elif class_name == 'cup':
                print("Cup detected!")
                # Your robot action here

# ===== METHOD 5: Save Results =====
def detect_and_save(image_path, output_path):
    results = model(image_path)

    # Save annotated image
    annotated_frame = results[0].plot()
    cv2.imwrite(output_path, annotated_frame)
    print(f"Saved result to {output_path}")

# ===== METHOD 6: Get Detailed Information =====
def detect_detailed(image_path):
    results = model(image_path)

    for result in results:
        # Get image dimensions
        img_height, img_width = result.orig_shape

        boxes = result.boxes
        print(f"Found {len(boxes)} objects")

        for i, box in enumerate(boxes):
            # Bounding box coordinates (normalized 0-1)
            x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()

            # Convert to pixel coordinates
            x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)

            # Box dimensions
            width = x2 - x1
            height = y2 - y1

            # Center point
            center_x = (x1 + x2) // 2
            center_y = (y1 + y2) // 2

            conf = float(box.conf[0])
            cls = int(box.cls[0])
            class_name = model.names[cls]

            print(f"\nObject {i+1}:")
            print(f"  Class: {class_name}")
            print(f"  Confidence: {conf:.2%}")
            print(f"  Bounding Box: ({x1}, {y1}) to ({x2}, {y2})")
            print(f"  Size: {width}x{height} pixels")
            print(f"  Center: ({center_x}, {center_y})")

# ===== Main Execution =====
if __name__ == "__main__":
    # Uncomment the one you want to run:

    #detect_image('/content/workspace (4).webp')
    detect_webcam()
    # detect_video('path/to/video.mp4')
    # detect_and_act('path/to/image.jpg')
    # detect_and_save('input.jpg', 'output.jpg')
    # detect_detailed('path/to/image.jpg')