# ECSE 415: Final Project
Mathieu Geoffroy, 260986559
Ryan Reszetnik, 260948454


December 5th, 2023

In [5]:
import numpy as np
import cv2
import os
from ultralytics import YOLO
import torch
from collections import defaultdict

working_dir = os.path.curdir

In [6]:
# load videos
video_mcgill = cv2.VideoCapture(os.path.join(working_dir, 'mcgill_drive.mp4'))
video_stcat = cv2.VideoCapture(os.path.join(working_dir, 'st-catherines_drive.mp4'))

In [7]:
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"

model = YOLO('yolov8n.pt').to(device)

# import YOLO labels from the dataset
yolo_labels = model.names

In [8]:
# Open the video file
video_path = "mcgill_drive.mp4"
cap = cv2.VideoCapture(video_path)

# Store the track history
track_history = defaultdict(lambda: [])

people_count = 0
car_count = 0

# Loop through the video frames
while cap.isOpened():
    # Read a frame from the video
    success, frame = cap.read()

    if success:
        # Run YOLOv8 tracking on the frame, persisting tracks between frames
        results = model.track(frame, persist=True)

        # Get the boxes, track IDs, class, for the frame
        boxes = results[0].boxes.xywh.cpu()
        track_ids = results[0].boxes.id.int().cpu().tolist()
        classes = results[0].boxes.cls.int().cpu().tolist()

        # Visualize the results on the frame
        annotated_frame = results[0].plot()
        
        # calculate the number of new people
        for cls, track_id in zip(classes, track_ids):
            if yolo_labels[cls] == 'person' and track_id not in track_history:
                people_count += 1
        
        # calculate the number of new cars
        for cls, track_id in zip(classes, track_ids):
            if yolo_labels[cls] == 'car' and track_id not in track_history:
                car_count += 1
        
        # Display the number of people
        cv2.putText(annotated_frame, f"Number of people: {people_count}", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
        
        # Display the number of cars
        cv2.putText(annotated_frame, f"Number of cars: {car_count}", (50, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
                

        # Plot the tracks
        for box, track_id in zip(boxes, track_ids):
            x, y, w, h = box
            track = track_history[track_id]
            track.append((float(x), float(y)))  # x, y center point
            if len(track) > 30:  # retain 90 tracks for 90 frames
                track.pop(0)

            # Draw the tracking lines
            points = np.hstack(track).astype(np.int32).reshape((-1, 1, 2))
            cv2.polylines(annotated_frame, [points], isClosed=False, color=(230, 230, 230), thickness=10)

        # Display the annotated frame
        cv2.imshow("YOLOv8 Tracking", annotated_frame)

        # Break the loop if 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break
    else:
        # Break the loop if the end of the video is reached
        break

# Release the video capture object and close the display window
cap.release()
cv2.destroyAllWindows()

0: 384x640 3 cars, 15.8ms
Speed: 2.3ms preprocess, 15.8ms inference, 7.5ms postprocess per image at shape (1, 3, 384, 640)
0: 384x640 3 cars, 14.4ms
Speed: 2.0ms preprocess, 14.4ms inference, 14.4ms postprocess per image at shape (1, 3, 384, 640)
0: 384x640 5 cars, 14.0ms
Speed: 2.8ms preprocess, 14.0ms inference, 13.6ms postprocess per image at shape (1, 3, 384, 640)
0: 384x640 5 cars, 13.8ms
Speed: 2.2ms preprocess, 13.8ms inference, 13.0ms postprocess per image at shape (1, 3, 384, 640)
0: 384x640 5 cars, 14.1ms
Speed: 2.3ms preprocess, 14.1ms inference, 14.2ms postprocess per image at shape (1, 3, 384, 640)
0: 384x640 5 cars, 13.2ms
Speed: 2.2ms preprocess, 13.2ms inference, 15.3ms postprocess per image at shape (1, 3, 384, 640)
0: 384x640 5 cars, 13.1ms
Speed: 1.8ms preprocess, 13.1ms inference, 15.2ms postprocess per image at shape (1, 3, 384, 640)
0: 384x640 5 cars, 13.8ms
Speed: 2.4ms preprocess, 13.8ms inference, 14.4ms postprocess per image at shape (1, 3, 384, 640)
0: 384x64