Task 3: single-object tracking challenge. The task aims to estimate the state of a target, indicated in the first frame, in the subsequent video frames.

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import imageio.v2 as io
import cv2
import torch
import os
import sys
from utils.utils_funcitons import *
device= "cuda" if torch.cuda.is_available() else "cpu"
from ultralytics import YOLO

In [2]:
dataset_path = r'.\dataset\Task3_Single_Object_Tracking'
sequences_folder = os.path.join(dataset_path, 'sequences')
annotations_folder = os.path.join(dataset_path, 'annotations')
model_path = r'.\models\best_yolov8n.pt'
#model_path = r'.\models\yolov8s.pt'

### YOLOv8 Object Tracker

In [49]:
model = YOLO(model_path)

# Images folder path
#img_folder = os.path.join(sequences_folder, 'uav0000053_00264_s') #people
img_folder = os.path.join(sequences_folder, 'uav0000317_00000_s') #motor
#img_folder = os.path.join(sequences_folder, 'uav0000115_00606_s') #car
img_names = sorted(os.listdir(img_folder))

# Video size
width, height = 1280, 720

# Codec and VideoWriter
codec = cv2.VideoWriter_fourcc(*'XVID')
fps = 30
output_video = cv2.VideoWriter(r'.\outputs\Task3\output_yolo.mp4', codec, fps, (width, height))

# Frame processing
for i, img in enumerate(img_names):
    img_fullpath = os.path.join(img_folder, img)
    frame = cv2.imread(img_fullpath)
    #frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    if frame is not None:
        # Redimensionar la imagen si es necesario
        if width and height:
            result_boxes = track_objects(model, frame, objects=['motor'], conf_thresh=0.3, iou=0.5, imgsz=1280, tracker=r'.\botsort_custom.yaml')
            # Detection control. If no detections were found, use the original frame
            if len(result_boxes[0].cls) > 0 and result_boxes[0].id != None: 
                pred_cls = result_boxes[0].cls.detach().cpu().numpy()
                conf = result_boxes[0].conf.detach().cpu().numpy()
                bboxes_xyxy = result_boxes[0].xyxy.detach().cpu().numpy()
                track_id = result_boxes[0].id
                
                #ID to track in order to match with annotations folder files
                obj_id_to_track = 2
                pos = torch.nonzero(track_id == obj_id_to_track)[0,0] if torch.nonzero(track_id == obj_id_to_track).nelement() != 0 else None

                if pos != None:
                    cv2.rectangle(frame, (int(bboxes_xyxy[pos][0]), int(bboxes_xyxy[pos][1])), (int(bboxes_xyxy[pos][2]), int(bboxes_xyxy[pos][3])), (255,255,255), 2)
                    cv2.putText(frame, f"Motor-{obj_id_to_track}", (int(bboxes_xyxy[pos][0]) + 10, int(bboxes_xyxy[pos][1]) - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255), 2)        

                #frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
                #frame_bgr = cv2.resize(frame_bgr, (width, height))
                frame = cv2.resize(frame, (width, height))
            else:
                #frame = cv2.resize(frame, (width, height))
                #frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
                frame = cv2.resize(frame, (width, height))

        #display_image(frame_bgr)
        #output_video.write(frame_bgr)
        output_video.write(frame)

# Liberar recursos
output_video.release()
cv2.destroyAllWindows()

YOLOv8 Track all objects in a class

In [43]:
model = YOLO(model_path)
objects = ['car']
number_class_list = []

if objects!=None:
    if objects!=['all']:
        for object in objects: 
            number_class_list.append(classes.index(object))
    elif objects == ['all']:
        number_class_list = list(range(len(classes)))

# Images folder path
#img_folder = os.path.join(sequences_folder, 'uav0000317_00000_s') #motor
#img_folder = os.path.join(sequences_folder, 'uav0000053_00264_s') #people
img_folder = os.path.join(sequences_folder, 'uav0000115_00606_s') #car

img_names = sorted(os.listdir(img_folder))

# Video size
width, height = 1280, 720

# Codec and VideoWriter
codec = cv2.VideoWriter_fourcc(*'XVID')
fps = 30
output_video = cv2.VideoWriter(r'.\outputs\Task3\output_yolo.mp4', codec, fps, (width, height))

# Frame processing
for i, img in enumerate(img_names):
    img_fullpath = os.path.join(img_folder, img)
    frame = cv2.imread(img_fullpath)
    #frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    if frame is not None:
        # Redimensionar la imagen si es necesario
        if width and height:
            results = model.track(frame, persist=True, verbose=False, conf=0.3, iou=0.5, classes=number_class_list, imgsz=704, tracker=r'.\botsort_custom.yaml')
            annotated_frame = results[0].plot()               
            annotated_frame = cv2.resize(annotated_frame, (width, height))
            #output_video.write(cv2.cvtColor(annotated_frame, cv2.COLOR_RGB2BGR))
            output_video.write(annotated_frame)

# Liberar recursos
output_video.release()
cv2.destroyAllWindows()

Video Process

In [6]:
model = YOLO(model_path)
objects = ['car']
number_class_list = []

if objects!=None:
    if objects!=['all']:
        for object in objects: 
            number_class_list.append(classes.index(object))
    elif objects == ['all']:
        number_class_list = list(range(len(classes)))

# Open the video file
video_path = "path/to/video.mp4"
cap = cv2.VideoCapture(video_path)

# Video size
width, height = 1280, 720

# Codec and VideoWriter
codec = cv2.VideoWriter_fourcc(*'XVID')
fps = 30
output_video = cv2.VideoWriter(r'.\outputs\Task3\output.mp4', codec, fps, (width, height))

# Frame processing
while cap.isOpened():
    success, frame = cap.read()
    if success:
        # Redimensionar la imagen si es necesario
        if width and height:
            results = model.track(frame, persist=True, verbose=False, conf=0.3, iou=0.5, classes=number_class_list, imgsz=704)
            annotated_frame = results[0].plot()               
            annotated_frame = cv2.resize(annotated_frame, (width, height))
            #output_video.write(cv2.cvtColor(annotated_frame, cv2.COLOR_RGB2BGR))
            output_video.write(annotated_frame) 
    # Display the annotated frame
        cv2.imshow("YOLOv8 Tracking", annotated_frame)

        # Break the loop if 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break
    else:
        # Break the loop if the end of the video is reached
        break
# Liberar recursos
cap.release()
output_video.release()
cv2.destroyAllWindows()