<a href="https://colab.research.google.com/github/ritwikraha/computer-needs-glasses/blob/master/image-object-tracking/Object_Segmented_Pose_Tracking.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Object Detection, Segmentation and Pose Estimation with YOLOv8

## Setup and Installation

In [1]:
!pip install -q ultralytics
!pip install -q opencv-python-headless
!pip install -q moviepy

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m778.8/778.8 kB[0m [31m15.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.3/21.3 MB[0m [31m70.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
import moviepy.editor as mpy
from google.colab import files

In [None]:
from ultralytics import SAM, YOLO
import numpy as np
import cv2

## Upload your video

In [3]:
# Upload the video file
uploaded = files.upload()

# Define the video file path
video_path = list(uploaded.keys())[0]

Saving Mike Tyson Mitt Work.mp4 to Mike Tyson Mitt Work.mp4


## Load Models

In [25]:
# Initialize the YOLOv8 model for object detection
detection_model = YOLO("yolov8n.pt")  # You can use any YOLOv8 model
sam_model = SAM("sam_b.pt")
# Initialize the YOLOv8 model for pose detection
pose_model = YOLO("yolov8n-pose.pt")

## Load the Video File

In [37]:
# Open the video file
cap = cv2.VideoCapture(video_path)

# Get the video writer initialized to save the output video
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter('output.avi', fourcc, 20.0, (int(cap.get(3)), int(cap.get(4))))

## Loop for Only Pose Estimation

In [None]:
# while(cap.isOpened()):
#     ret, frame = cap.read()
#     if ret:
#         # Perform pose detection
#         results = model(frame)

#         # Extract and draw poses on the frame
#         for result in results:
#             annotated_frame = result.plot()

#         # Write the frame into the output video
#         out.write(annotated_frame)
#     else:
#         break

# # Release the video capture and writer objects
# cap.release()
# out.release()

## Loop for Detection + Segmentation + Pose Detection

In [None]:
while(cap.isOpened()):
    ret, frame = cap.read()
    if ret:
        # Detect bounding boxes of persons
        detection_results = detection_model(frame)

        # Extract bounding box of the first person detected
        # Failing for multi-view
        # TODO-Ritwik: implement an id-tracker
        person_bbox = None
        for result in detection_results:
            for i, bbox in enumerate(result.boxes.xyxy):
                if result.names[int(result.boxes.cls[i])] == "person":
                    person_bbox = bbox.cpu().numpy().tolist()
                    break
            if person_bbox:
                break

        if person_bbox:
            # Segment the person using SAM model with the detected bounding box
            x1, y1, x2, y2 = map(int, person_bbox)
            mask_results = sam_model(frame, bboxes=[[x1, y1, x2, y2]])

            # Debug: print structure of mask_results
            print("mask_results structure:", mask_results)

            # Extract the mask
            mask = mask_results[0].masks.data[0].cpu().numpy()

            # Apply mask to the frame
            segmented_frame = cv2.bitwise_and(frame, frame, mask=mask.astype(np.uint8))

            # Step 4: Run pose detection on the segmented person
            pose_results = pose_model(segmented_frame)

            # Extract and draw poses on the frame
            for result in pose_results:
                annotated_frame = result.plot()

            # Write the frame into the output video
            out.write(annotated_frame)
        else:
            # If no person is detected, write the original frame (optimise later)
            out.write(frame)
    else:
        break

# Release the video capture and writer objects
cap.release()
out.release()

In [48]:
# Display the processed video
output_video_path = 'output.avi'

# Convert the video to MP4 format to display in Colab
clip = mpy.VideoFileClip(output_video_path)
clip.write_videofile("output-detected.mp4")

Moviepy - Building video output-new.mp4.
Moviepy - Writing video output-new.mp4





Moviepy - Done !
Moviepy - video ready output-new.mp4
