In [None]:
import cv2
from ultralytics import YOLO
from ultralytics.utils.plotting import Annotator, colors

home_dir = "C:/Users/Alex/Desktop/blackboxML"
train_loc = "train1"
run = "test"

image_loc = home_dir + "images/see_cam/full/" + run
model_loc = "datasets/" + train_loc + "/weights/best.pt"
model = YOLO(model_loc)

im0 = cv2.imread(image_loc + "")

# Create an annotator object to draw on the frame
annotator = Annotator(im0, line_width=2)

# Perform object tracking on the current frame
results = model.track(im0, persist=True)

# Check if tracking IDs and masks are present in the results
if results[0].boxes.id is not None and results[0].masks is not None:
    # Extract masks and tracking IDs
    masks = results[0].masks.xy
    track_ids = results[0].boxes.id.int().cpu().tolist()

    # Annotate each mask with its corresponding tracking ID and color
    for mask, track_id in zip(masks, track_ids):
        annotator.seg_bbox(mask=mask, mask_color=colors(track_id, True), track_label=str(track_id))

# Display the annotated frame
cv2.imshow("instance-segmentation-object-tracking", im0)

# Make a function which saves annotated images to a video file - https://github.com/ChristineDewi/ultralytics-YOLOv8-Hand-Detection/blob/main/examples/object_tracking.ipynb


In [4]:
from collections import defaultdict
import cv2
import csv
from ultralytics import YOLO
from ultralytics.utils.plotting import Annotator, colors

# Dictionary to store tracking history with default empty lists
track_history = defaultdict(lambda: [])

# Load the YOLO model with segmentation capabilities
home_dir = "C:/Users/Alex/Desktop/blackboxML/"
train_loc = "train1"
run = "test"

image_loc = f"{home_dir}images/see_cam/full/"
model_loc = f"datasets/{train_loc}/weights/best.pt"
model = YOLO(model_loc)

# Open the video file
cap = cv2.VideoCapture(f"{image_loc}IMG_6103.mov")

# Retrieve video properties: width, height, and frames per second
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))

# Initialize video writer to save the output video with the specified properties
out = cv2.VideoWriter("wtf.avi", cv2.VideoWriter_fourcc(*"MJPG"), fps, (w, h))

# Open the CSV file to write the coordinates
csv_file = open("coordinates.csv", mode="w", newline="")
csv_writer = csv.writer(csv_file)
csv_writer.writerow(["Frame", "Tracking ID", "X Coordinate", "Y Coordinate"])  # Writing the header row

while True:
    # Read a frame from the video
    ret, im0 = cap.read()
    if not ret:
        print("Video frame is empty or video processing has been successfully completed.")
        break

    # Create an annotator object to draw on the frame
    annotator = Annotator(im0, line_width=2)

    # Perform object tracking on the current frame
    results = model.track(im0, persist=True)

    # Check if tracking IDs and masks are present in the results
    if results[0].boxes.id is not None and results[0].masks is not None:
        # Extract masks and tracking IDs
        masks = results[0].masks.xy
        track_ids = results[0].boxes.id.int().cpu().tolist()

        # Debugging: Print track IDs and masks to check if they are being processed
        print(f"Track IDs: {track_ids}")
        print(f"Masks: {masks}")

        # Annotate each mask with its corresponding tracking ID and color
        for mask, track_id in zip(masks, track_ids):
            annotator.seg_bbox(mask=mask, mask_color=colors(track_id, True), track_label=str(track_id))

            # Calculate the bounding box (min and max x and y values) from the mask
            mask_points = mask.reshape((-1, 2))  # Flatten mask to a list of points
            x_min, y_min = mask_points.min(axis=0)
            x_max, y_max = mask_points.max(axis=0)

            # Draw a bounding box on the image
            cv2.rectangle(im0, (int(x_min), int(y_min)), (int(x_max), int(y_max)), (255, 0, 0), 2)

            # Calculate the centroid of the mask (to represent the hand's center)
            centroid = mask_points.mean(axis=0)  # Calculate the center of the mask
            centroid = tuple(centroid.astype(int))

            # Debugging: Print centroid coordinates to ensure they are calculated correctly
            print(f"Centroid for Track ID {track_id}: {centroid}")

            # Display the centroid coordinates on the frame
            cv2.putText(im0, f"ID: {track_id} ({centroid[0]}, {centroid[1]})", 
                        (centroid[0] + 10, centroid[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 
                        0.5, (255, 0, 0), 2)

            # Optionally, you can draw a circle at the centroid position
            cv2.circle(im0, centroid, 5, (0, 0, 255), -1)

            # Save the coordinates to the CSV file
            csv_writer.writerow([int(cap.get(cv2.CAP_PROP_POS_FRAMES)), track_id, centroid[0], centroid[1]])

    # Write the annotated frame to the output video
    out.write(im0)
    # Display the annotated frame
    cv2.imshow("instance-segmentation-object-tracking", im0)

    # Exit the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

# Release the video writer and capture objects, and close all OpenCV windows
out.release()
cap.release()
csv_file.close()
cv2.destroyAllWindows()



0: 384x640 1 hand, 26.1ms
Speed: 4.1ms preprocess, 26.1ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 hand, 78.4ms
Speed: 5.2ms preprocess, 78.4ms inference, 10.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 hand, 66.5ms
Speed: 1.0ms preprocess, 66.5ms inference, 3.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 hand, 17.7ms
Speed: 3.0ms preprocess, 17.7ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 hand, 7.0ms
Speed: 1.0ms preprocess, 7.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 hand, 8.0ms
Speed: 1.1ms preprocess, 8.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 hand, 8.0ms
Speed: 3.0ms preprocess, 8.0ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 hand, 10.1ms
Speed: 2.0ms preprocess, 10.1ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1

In [None]:
from collections import defaultdict
import cv2
import csv
from ultralytics import YOLO
from ultralytics.utils.plotting import Annotator, colors

# Dictionary to store tracking history with default empty lists
track_history = defaultdict(lambda: [])

# Load the YOLO model with segmentation capabilities
home_dir = "C:/Users/Alex/Desktop/blackboxML/"
train_loc = "train1"
run = "test"

image_loc = f"{home_dir}images/see_cam/full/"
model_loc = f"datasets/{train_loc}/weights/best.pt"
model = YOLO(model_loc)

# Open the video file
cap = cv2.VideoCapture(f"{image_loc}shortenedflightnov.mp4")

# Retrieve video properties: width, height, and frames per second
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))

# Initialize video writer to save the output video with the specified properties
out = cv2.VideoWriter("wtf.avi", cv2.VideoWriter_fourcc(*"MJPG"), fps, (w, h))

# Open the CSV file to write the coordinates
csv_file = open("coordinates.csv", mode="w", newline="")
csv_writer = csv.writer(csv_file)
csv_writer.writerow(["Frame", "Tracking ID", "X Coordinate", "Y Coordinate"])  # Writing the header row

while True:
    # Read a frame from the video
    ret, im0 = cap.read()
    if not ret:
        print("Video frame is empty or video processing has been successfully completed.")
        break

    # Create an annotator object to draw on the frame
    annotator = Annotator(im0, line_width=2)

    # Perform object tracking on the current frame
    results = model.track(im0, persist=True)

    # Debugging: Print the results object to see available attributes
    print(f"Results: {results}")

    # Check if bounding boxes are present and if tracking IDs are available
    if results[0].boxes is not None and results[0].boxes.id is not None:
        # Extract bounding boxes and track IDs
        boxes = results[0].boxes.xyxy  # Format: [x_min, y_min, x_max, y_max]
        track_ids = results[0].boxes.id.int().cpu().tolist()

        # Debugging: Print bounding boxes and track IDs to check if they are being processed
        print(f"Track IDs: {track_ids}")
        print(f"Boxes: {boxes}")

        # Annotate each bounding box with its corresponding tracking ID and color
        for box, track_id in zip(boxes, track_ids):
            x_min, y_min, x_max, y_max = box  # Extract coordinates from the bounding box

            # Draw a bounding box on the image
            cv2.rectangle(im0, (int(x_min), int(y_min)), (int(x_max), int(y_max)), (255, 0, 0), 2)

            # Calculate the centroid of the bounding box (center of the hand)
            centroid = ((x_min + x_max) // 2, (y_min + y_max) // 2)

            # Debugging: Print centroid coordinates to ensure they are calculated correctly
            print(f"Centroid for Track ID {track_id}: {centroid}")

            # Display the centroid coordinates on the frame
            cv2.putText(im0, f"ID: {track_id} ({int(centroid[0])}, {int(centroid[1])})", 
                        (int(centroid[0]) + 10, int(centroid[1]) - 10), 
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)

            # Optionally, you can draw a circle at the centroid position
            cv2.circle(im0, (int(centroid[0]), int(centroid[1])), 5, (0, 0, 255), -1)

            # Save the coordinates to the CSV file
            csv_writer.writerow([int(cap.get(cv2.CAP_PROP_POS_FRAMES)), track_id, int(centroid[0]), int(centroid[1])])

    else:
        print("No objects or tracking IDs detected in this frame.")

    # Write the annotated frame to the output video
    out.write(im0)
    # Display the annotated frame
    cv2.imshow("instance-segmentation-object-tracking", im0)

    # Exit the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

# Release the video writer and capture objects, and close all OpenCV windows
out.release()
cap.release()
csv_file.close()
cv2.destroyAllWindows()



0: 384x640 1 hand, 26.0ms
Speed: 6.7ms preprocess, 26.0ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)
Results: [ultralytics.engine.results.Results object with attributes:

boxes: ultralytics.engine.results.Boxes object
keypoints: ultralytics.engine.results.Keypoints object
masks: None
names: {0: 'hand'}
obb: None
orig_img: array([[[120,  98,  61],
        [144, 122,  85],
        [187, 144, 101],
        ...,
        [ 93,  91,  87],
        [ 93,  91,  87],
        [ 93,  91,  87]],

       [[104,  82,  45],
        [132, 110,  73],
        [185, 142,  99],
        ...,
        [ 93,  91,  87],
        [ 93,  91,  87],
        [ 93,  91,  87]],

       [[ 90,  68,  31],
        [118,  96,  59],
        [171, 128,  85],
        ...,
        [ 93,  91,  87],
        [ 93,  91,  87],
        [ 93,  91,  87]],

       ...,

       [[188, 103,  44],
        [188, 103,  44],
        [188, 103,  44],
        ...,
        [ 37,  66,  94],
        [ 37,  66,  94],
       