In [None]:
import cv2
from matplotlib import pyplot as plt
from ultralytics import YOLO
from dataclasses import dataclass
import numpy as np
import os
from IPython.display import Video, display, Image
import imageio

from tracker_yolo.TrackObject import TrackedObject, Detection
from tracker_yolo.utils import create_tracker, bbox_center
from tracker_yolo.utils import iou, estimate_speed_mean, overlaps_existing_track
from tracker_yolo.utils import associate_detections
from tracker_yolo.plotting import draw_kalman_prediction




model = YOLO("models/yolo26s.pt")

In [None]:
print(cv2.__version__)
print(hasattr(cv2, "TrackerCSRT_create"))
# Use CV2 to open images
# im1 = cv2.imread("Inputs/bridge_cars.jpg")
# im1 = cv2.imread("Inputs/car_behind_rails.jpg")
im1 = cv2.imread("Inputs/car_stop.jpg")
results = model.predict(source=im1, save=True, show=False)  # save plotted images

In [None]:
# Print the im1 dimensions:
print(f"Image 1 dimensions: {im1.shape}")


print(type(results))
print(len(results))
print(dir(results[0]))
# print(results[0])

In [None]:
print("-" * 38)
print("boxes")
# print(results[0].boxes)
print(results[0].boxes.cls)
print(dir(results[0].boxes))
print(results[0].boxes.xyxy) # Positions of the bounding boxes in xyxy format
# print(results[0].boxes.xywh) # Positions of the bounding boxes in xywh format (center x, center y, width, height)
# print(results[0].names)

print([results[0].names[int(_)] for _ in results[0].boxes.cls])
print("-" * 38)

In [None]:
print(im1.size)
# Plot only image and boxes using matplotlib
plt.imshow(im1)
for box in results[0].boxes:
    x1, y1, x2, y2 = box.xyxy[0]
    width, height = x2 - x1, y2 - y1
    rect = plt.Rectangle((x1, y1), width, height, fill=False, color='red', linewidth=2)
    plt.gca().add_patch(rect)
    # Plot class label
    cls = int(box.cls[0])
    plt.text(x1, y1, results[0].names[cls], color='white', fontsize=12, backgroundcolor='red')
plt.axis('off')  # Hide axis
plt.show()

In [None]:
input_video_path = "Inputs/static_cam_cars.mp4"
if not os.path.exists(input_video_path):
    raise(f"Input video not found at path: {input_video_path}")


out_dir = os.path.basename(input_video_path).split('.')[0]
out_dir = os.path.join("Outputs/", out_dir)
os.makedirs(out_dir, exist_ok=True)
cap = cv2.VideoCapture(input_video_path)
# TODO: Finish the [out_dir] usage!
print(f'Output dir: [{out_dir}]')

frame_iter = 0
while cap.isOpened():
    ret, frame = cap.read()
    frame_iter += 1
    if frame_iter % 30 == 0:
        print(f"Frame {frame_iter}")
    if not ret:
        break


# Print total number of frames
print(f"Total number of frames: {frame_iter}")
# Print the rate of frames per second
fps = cap.get(cv2.CAP_PROP_FPS)
print(f"Frames per second: {fps}")
print(f"Duration of the video: {frame_iter / fps:.2f} seconds")


In [None]:
cap = cv2.VideoCapture(input_video_path)

fps = cap.get(cv2.CAP_PROP_FPS)
width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Get length of the video
length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

print(f"Total number of frames: {length}. ")
video_duration = length / fps
print(f"Video duration (s): {video_duration:.2f}. ")
print(f'Video FPS: {fps:.2f}, Width: {width}, Height: {height}')

# writer = cv2.VideoWriter(
#     "Outputs/tracked_output.mp4",
#     cv2.VideoWriter_fourcc(*"mp4v"),
#     fps,
#     (width, height)
# )

# fourcc = cv2.VideoWriter_fourcc(*"MJPG")
# fourcc = cv2.VideoWriter_fourcc(*"mp4v")
fourcc = cv2.VideoWriter_fourcc(*"MP4V")
# fourcc = cv2.VideoWriter_fourcc(*"XVID")
# 0x31637661  # 'avc1' in little endian
# fourcc = 0x31637661  # 'avc1' in little endian
writer = cv2.VideoWriter(
    "Outputs/tracked_output.mp4",  # <-- .avi !
    fourcc,
    fps,
    (width, height),
)


gif_writer = imageio.get_writer(
    "Outputs/tracked_output.gif",
    mode="I",
    fps=5,          # lower FPS - better visible what's going on
    loop=0,   # 0 = infinite loop
)

tracked_objects = []
# TrackedObject._next_id = 0
TrackedObject.reset_ids()
# For FPS=30
MAX_MISSED = 30 # Max missed frames before deleting track
YOLO_INTERVAL = 15 # Run YOLO every N frames

In [None]:
frame_idx = 0

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    use_yolo = frame_idx % YOLO_INTERVAL == 0


    for obj_id, obj in enumerate(tracked_objects):
        obj.predict_kf()

    if use_yolo:


        results = model.predict(frame, conf=0.3, verbose=False)[0]

        detections = []
        if results.boxes is not None:
            for box, cls, conf in zip(
                results.boxes.xyxy.cpu().numpy(),
                results.boxes.cls.cpu().numpy(),
                results.boxes.conf.cpu().numpy(),
            ):
                found_object_class = results.names[int(cls)]
                if found_object_class == "car":
                    detections.append(
                        Detection(bbox=box, conf=conf, cls=int(cls))
                    )
                # else: 
                    # print(f'Found object of class: [{found_object_class}]')


            # --- association ---
            matches, unmatched_tracks, unmatched_dets = associate_detections(
                tracked_objects, detections
            )

            # update matched tracks
            for t_idx, d_idx in matches:
                tracked_objects[t_idx].update(detections[d_idx])

            # mark missed tracks
            for t_idx in unmatched_tracks:
                tracked_objects[t_idx].mark_missed()

            # create new tracks
            for d_idx in unmatched_dets:
                det = detections[d_idx]
                # With condition to avoid duplicates
                if not overlaps_existing_track(det, tracked_objects):
                    tracked_objects.append(TrackedObject(det, fps, frame))

            # remove dead tracks
            tracked_objects = [
                t for t in tracked_objects if t.missed <= MAX_MISSED
            ]


        # --- draw all detections ---
        for det in detections:
            x1, y1, x2, y2 = map(int, det.bbox)
            # BRG
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 255), 2)

        # --- draw unmatched detections (orange) ---
        for d_idx in unmatched_dets:
            det = detections[d_idx]
            x1, y1, x2, y2 = map(int, det.bbox)
            cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)

    else:

        # Tracker-only update
        for track in tracked_objects:
            track.update_from_tracker(frame)

        # No detections: mark all as missed
        for track in tracked_objects:
            track.mark_missed()

        # remove dead tracks
        tracked_objects = [
            t for t in tracked_objects if t.missed <= MAX_MISSED
        ]


    if frame_idx%30 == 0:
        # Create a sorted list of IOU values between all tracked objects
        iou_values = []
        for i in range(len(tracked_objects)):
            for j in range(i + 1, len(tracked_objects)):
                iou_score = iou(tracked_objects[i].bbox, tracked_objects[j].bbox)
                iou_values.append((tracked_objects[i].id, tracked_objects[j].id, iou_score))
        iou_values.sort(key=lambda x: x[2], reverse=True)
        # print(f'Frame {frame_idx}: Top IOU values between tracked objects:')
        # for id1, id2, score in iou_values[:5]:  # Print top 5 IOU values
        #     print(f'  IDs {id1} & {id2}: IOU = {score:.4f}')



    # On top right corner, print frame idx, number of active tracks and info if it's yolo frame or not:
    info_text = f'Frame {frame_idx}; Duration {frame_idx/fps:.2f}s; Active Tracks: {len(tracked_objects)}; ' + \
                ('[YOLO]' if use_yolo else '[Trac]')
    cv2.putText(
        frame,
        info_text,
        (10, 30),
        cv2.FONT_HERSHEY_SIMPLEX,
        0.7,
        (0, 255, 255),
        2,
    )
    


    # if frame_idx % 10 == 0:
    #     print(f'Frame {frame_idx}; Duration {frame_idx/fps:.2f}: {len(tracked_objects)} active tracks')
    #     # Print number of matches, unmatched tracks, unmatched detections
    #     print(f'  Matches: {len(matches)}, Unmatched Tracks: {len(unmatched_tracks)}, Unmatched Detections: {len(unmatched_dets)}')


    # --- draw ---
    for obj in tracked_objects:
        x1, y1, x2, y2 = map(int, obj.bbox)
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(
            frame,
            f"ID {obj.id}",
            (x1, y1 - 5),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.6,
            (0, 255, 0),
            2,
        )
    
    speed_list = []
    # Update timeticks for all tracked objects
    for obj_id, obj in enumerate(tracked_objects):
        # List to store speed estimates per track object:

        obj.update_timeticks(frame_idx, fps)

        # obj_speed = estimate_speed(obj)
        obj_speed = estimate_speed_mean(obj)

        speed_list.append((obj.id, obj_speed))

        # if frame_idx % 10 == 0:
        #     if obj_speed is not None:
        #         print(f'{obj.id} : {obj_speed:.2f} [m/s] : {(obj_speed*3.6):.2f} [km/h]')

    speed_n_entries = 7

    # Put text of highest `speed_n_entries` tracks on the frame
    # Skip if speed is None
    for track_id, track_speed in sorted(speed_list, key=lambda x: x[1] if x[1] is not None else -1, reverse=True)[:speed_n_entries]:
        if track_speed is not None:
            # Find the corresponding tracked object to get its bbox
            for obj in tracked_objects:
                if obj.id == track_id:
                    x1, y1, x2, y2 = map(int, obj.bbox)
                    cv2.putText(
                        frame,
                        f"{track_speed*3.6:.1f} km/h",
                        (x1, y2 + 20),
                        cv2.FONT_HERSHEY_SIMPLEX,
                        0.6,
                        (255, 255, 0),
                        2,
                    )
                    break

    for obj_id, obj in enumerate(tracked_objects):
        draw_kalman_prediction(frame, obj, steps=30, color=(255, 0, 255))
        # if obj.id in [9]:
        #     print(f'KF info on obj [{obj.id}] at frame [{frame_idx}]')
        #     print(f'position: {float(obj.kf.position[0]):.1f}, {float(obj.kf.position[1]):.1f}')
        #     print(f'velocity: {float(obj.kf.velocity[0]):.1f}, {float(obj.kf.velocity[1]):.1f}')

    

    if frame_idx % 4 == 0:
        gif_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        gif_writer.append_data(gif_frame)

    # Add early stop for testing
    # if frame_idx >= 180:
    if frame_idx >= 120:
    # if frame_idx >= 60:
        break

    writer.write(frame)
    frame_idx += 1

cap.release()
writer.release()
gif_writer.close()

print(f"Writer opened: {writer.isOpened()}")


In [None]:
display(Image("Outputs/tracked_output.gif", embed=True))


In [None]:
# display(Video("Inputs/video.mp4", embed=True))
