# Exercise 3.1

Welcome to Exercise 3.1. In this exercise, we will learn how to use the DeepSORT algorithm with the YOLOv5 model to track objects in video.


## Instructions

Below are detailed instructions to help you understand the process of applying DeepSORT and YOLO to video.


### Libraries

In [21]:
import numpy as np
import torch
import cv2
import math
import time
from PIL import Image
from deep_sort_realtime.deepsort_tracker import DeepSort
import pathlib
temp = pathlib.PosixPath
pathlib.PosixPath = pathlib.WindowsPath

### Load Detection Mode

Solution to exercise 1

In [None]:
# force reload: avoid parameter conflicts when loading new models
model = torch.hub.load('ultralytics/yolov5', 'custom', path= r"[...]\best.pt", force_reload = True)

### Initialize DeepSORT

Solution to exercise 2

In [4]:
# Initialize DeepSORT
object_tracker = DeepSort(max_age=3,
                          n_init=2,
                          nms_max_overlap=1.0,
                          max_cosine_distance=0.3)

### Input processing functions for the DeepSORT algorithm

Solution to exercise 3

In [23]:
def score_frame(frame):
	results = model(frame)
	labels, cord = results.xyxyn[0][:, -1], results.xyxyn[0][:, :-1]
	return labels, cord

classes = model.names

def class_to_label(x):
	return classes[int(x)]

def plot_boxes(results, height, width, confidence=0.3):
	labels, cord = results
	detections = []

	for i in range(len(labels)):
		row = cord[i]
		if row[4] >= confidence:
			x1, y1, x2, y2 = int(
				row[0]*width), int(row[1]*height), int(row[2]*width), int(row[3]*height)

			conf = float(row[4].item())
			class_label = class_to_label(labels[i])
			# print(feature)
			detections.append(
				([x1, y1, int(x2-x1), int(y2-y1)], conf, class_label))

	return detections

### Apply DeepSORT and YOLO to Video

Solution to exercise 4

In [27]:
import cv2

input_video_path = [...] # Path to input video
output_video_path = [...]  # Path to input video

# Open input video
cap = cv2.VideoCapture(input_video_path)
if not cap.isOpened():
    print("Error: Could not open video.")
    exit()

# Get video parameters
fps = int(cap.get(cv2.CAP_PROP_FPS))
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Create video writer to save output video
fourcc = cv2.VideoWriter_fourcc(*'mp4v') 
out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))
    
while cap.isOpened():
	ret, frame = cap.read()
	if not ret:
		break

	# Predict objects in the frame

	results = score_frame(frame)
	detections = plot_boxes(results=results, height=frame.shape[0], width=frame.shape[1], confidence=0.5)
	tracks = object_tracker.update_tracks(detections, frame=frame)

	# Draw frames and track IDs onto the video
	for track in tracks:
		bbox = track.to_tlbr()  # Bounding box as (x1, y1, x2, y2)
		track_id = track.track_id  # ID tracking
		x1, y1, x2, y2 = map(int, bbox)
		cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
		cv2.putText(frame, f"ID: {track_id}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)


	# Record frames to output video
	out.write(frame)

cap.release()
out.release()
cv2.destroyAllWindows()
