In [None]:
import cv2 # python opencv for image processing
from ultralytics import YOLO # yolo v8 for object detection
from deep_sort_realtime.deepsort_tracker import DeepSort # deepsort for object tracking
import numpy as np
import sys
import colorsys
import os
import datetime

In [None]:
arguments = sys.argv[1:]

for arg in arguments:
    print(arg)
    
mode = sys.argv[1]
if mode == 'video':
  input_filename = sys.argv[2]
elif mode == 'benchmark':
  test_case_name = sys.argv[2]
  
  
# ------------ test here -------------
# 1. camera
# 2. video 'input_filename'
# 3. benchmark 'test_case_name'






current_path = os.getcwd()
test_case_name = 'CarScale'
test_case_path = current_path + '/benchmark/' + test_case_name + '/img'

In [None]:
mode = 'benchmark'
test_case_name = 'CarScale'

In [None]:
mode = 'camera'

In [None]:
mode = 'video'
input_filename = 'input.mp4'

In [None]:
def VideoWriter(video_reader, output_filename):
    frame_width = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(video_reader.get(cv2.CAP_PROP_FPS))
    fourcc = cv2.VideoWriter_fourcc(*'MP4V')
    
    video_writer = cv2.VideoWriter(output_filename, fourcc, fps, (frame_width, frame_height))
    return video_writer

In [None]:
detector = YOLO("yolov8n.pt")
# detector.train(data='coco.yaml', epochs=100, imgsz=640) # Train the model
tracker = DeepSort(max_age=30)

color_size = 20 # number of objects in the frame
colors = [list(np.random.random(size=3) * 256) for _ in range(color_size)] # hold different rgb colors for each object
MIN_CONFIDENCE = 0.6

current_path = os.getcwd()
test_case_path = current_path + '/benchmark/' + test_case_name + '/img'

if mode == 'video':
  video_reader = cv2.VideoCapture(current_path + '/video/'  + input_filename)
elif mode == 'benchmark':
  video_reader = cv2.VideoCapture(test_case_path + "/%04d.jpg", cv2.CAP_IMAGES)
else:
  video_reader = cv2.VideoCapture(0)
  
video_writer = VideoWriter(video_reader, current_path + "/output/output.mp4")
  
# video_reader = cv2.VideoCapture(0) # <--- CAMERA_OPTION
# video_reader = cv2.VideoCapture("input.mp4") # <--- VIDEO_OPTION
# video_reader = cv2.VideoCapture(path_name + "/%04d.jpg", cv2.CAP_IMAGES) #<--- BENCHMARK_OPTION

frame_counter = 0
with open(current_path + '/output/output.txt', 'w') as file:
  while True:
    start_time = datetime.datetime.now()

    ### --- video reading: video -> frame --- ###
    retval, frame = video_reader.read()
    if not retval: break

    ### --- dectecting: frame -> list of ([left,top,w,h], confidence, detection_class) --- ###
    detections = detector(frame)[0].boxes.data.tolist()
    results = []

    # modify the data structure in detections
    for i in range(len(detections)):
      dectection = detections[i]

      confidence = float(dectection[4])
      if confidence < MIN_CONFIDENCE: continue

      left, top, right, bottom = \
        int(dectection[0]), int(dectection[1]), int(dectection[2]), int(dectection[3])
      w = right - left
      h = bottom - top

      detection_class = int(dectection[5])
      results.append(([left,top,w,h], confidence, detection_class))

    ### --- tracking: list of ([left,top,w,h], confidence, detection_class), frame -> tracks (i.e. track_id, ltrb) --- ###
    tracks = tracker.update_tracks(results, frame=frame)
    for track in tracks:
      if not track.is_confirmed(): continue
      # get track id
      track_id = int(track.track_id)
      # get bounding box
      ltrb = track.to_ltrb()
      xmin, ymin, xmax, ymax = int(ltrb[0]), int(ltrb[1]), int(ltrb[2]), int(ltrb[3])
      # draw track id and bounding box
      cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), colors[track_id % color_size], 2)
      cv2.rectangle(frame, (xmin - 1, ymin - 30), (xmin + 31, ymin), colors[track_id % color_size], -1)
      cv2.putText(frame, str(track_id), (xmin + 5, ymin - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)

#       print("----------------------------")
#       print("frame: ")
#       print(frame_counter)
#       print("track id: ")
#       print(track_id)
#       print("bouding box (left, top, weight, height): ")
#       print(xmin, ymin, xmax - xmin, ymax - ymin)
#       print("----------------------------")
      file.write(f"{frame_counter}  {track_id}  {xmin}  {ymin}  {xmax - xmin}  {ymax - ymin}\n")

    ### --- output: display and store output frame withtracking result --- ###
    end_time = datetime.datetime.now()
    fps = f"FPS: {1 / (end_time - start_time).total_seconds():.2f}"
    cv2.putText(frame, fps, (10, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 5)
    cv2.imshow("Output", frame)

    video_writer.write(frame) # <--- VIDEO_OPTION
  
    ### --- controller: quit using `q` button --- ###
    if cv2.waitKey(1) == ord("q"): break
      
    frame_counter += 1
    
# release all components
video_reader.release()
video_writer.release()

cv2.waitKey(1)
cv2.destroyAllWindows()
cv2.waitKey(1)

In [None]:
cv2.waitKey(1)
cv2.destroyAllWindows()
cv2.waitKey(1)