Set each path. Mount the drive as needed.

In [None]:
input_video_path = "/content/drive/MyDrive/input.mp4"
output_video_path = "output.mp4"
tracking_labels_dir = "/content/drive/MyDrive/labels"

This cell only needs to be executed once.

In [None]:
import glob
import itertools
import cv2
import numpy as np
from tqdm import tqdm
from decimal import Decimal, ROUND_HALF_UP


def yolo_bbox2img_coordinate(list_):
    cx, cy, w, h = list_
    cx = float(cx) * video_width
    cy = float(cy) * video_height
    w = float(w) * video_width
    h = float(h) * video_height
    return [cx - w / 2., cy - h / 2., cx + w / 2., cy + h / 2.]


def my_round(n):
    return int(Decimal(str(n)).quantize(Decimal("1"), rounding=ROUND_HALF_UP))


def my_round_list(l):
    return [my_round(e) for e in l]


def calc_center(bbox):
    return my_round_list([(bbox[0] + bbox[2]) / 2., (bbox[1] + bbox[3]) / 2.])

This cell must always be executed first if the last cell is to be executed again.

In [None]:
invideo = cv2.VideoCapture(input_video_path)
codec = cv2.VideoWriter_fourcc(*'mp4v')
video_fps = invideo.get(cv2.CAP_PROP_FPS)
video_width = int(invideo.get(cv2.CAP_PROP_FRAME_WIDTH))
video_height = int(invideo.get(cv2.CAP_PROP_FRAME_HEIGHT))
video_num_frm = int(invideo.get(cv2.CAP_PROP_FRAME_COUNT))
outvideo = cv2.VideoWriter(output_video_path, codec, video_fps,
                           (video_width + 250, video_height))

Read tracking labels.

In [None]:
# Sort by predicted annotation ID
pd_tracks = {}  # key: frame number, value: bbox
pd_fer = []  # Rotifer ID predicted as "fertilized"
rot_ids = []
fer_ids = []

for i_frm in range(video_num_frm):
    pd_tracks[i_frm] = {}
    tmp_rot_ids = []
    tmp_fer_ids = []
    
    '''
    Note! This path should be set appropriately for your situation.
    '''
    path = f"{tracking_labels_dir}/{str(i_frm+1)}.txt"

    with open(path, "r") as fr:
        lines = fr.readlines()

    for line in lines:
        line = line.strip().split(" ")
        bbox = yolo_bbox2img_coordinate(line[1:5])
        class_ = int(line[0])
        id_ = int(line[-1])

        if class_ == 0:
            tmp_rot_ids.append(id_)
        else:
            tmp_fer_ids.append(id_)

        # Store if it's predicted as "fertilized" (1) even once.
        if class_ == 1 and id_ not in pd_fer:
            pd_fer.append(id_)

        if id_ not in pd_tracks[i_frm].keys():
            pd_tracks[i_frm][id_] = my_round_list(bbox)

    rot_ids.append(tmp_rot_ids)
    fer_ids.append(tmp_fer_ids)

tmp = list(set(itertools.chain.from_iterable(rot_ids)))
pd_rot = [t for t in tmp if t not in pd_fer]

Output visualization video.

In [None]:
# You can change as you like.
colors = [(0, 255, 0), (0, 0, 255), (255, 0, 0)]
win_size = video_fps * 3
font_scale = 1.0
font_thickness = 2

for i_frm in tqdm(range(video_num_frm)):
    ret, frm = invideo.read()
    original_frm = np.copy(frm)
    assert ret

    color_deno = min(win_size, i_frm)

    for tmp_i_frm in range(i_frm, -1, -1):
        if i_frm - tmp_i_frm >= win_size:
            break

        if i_frm > win_size:
            color_nume = tmp_i_frm - (i_frm - win_size)
        else:
            color_nume = tmp_i_frm

        bboxes1 = pd_tracks[tmp_i_frm]

        if tmp_i_frm > 0:
            bboxes2 = pd_tracks[tmp_i_frm - 1]
            
            for id_, bbox in bboxes1.items():
                if id_ in bboxes2.keys():
                    # Offset value of color
                    tmp = my_round(color_nume * 255. / (color_deno))
                    color = [255 - tmp, tmp, tmp]
                    cv2.line(frm, calc_center(bbox), calc_center(bboxes2[id_]),
                             color, 6, cv2.LINE_AA)
    
    for id_, bbox in pd_tracks[i_frm].items():
        tmp = 1 if id_ in pd_fer else 0
        cv2.rectangle(frm, (bbox[0], bbox[1]), (bbox[2], bbox[3]), colors[tmp],
                      4, cv2.LINE_4)

    blended = cv2.addWeighted(frm, 0.7, original_frm, 0.3, 0)
    blended = np.hstack((blended, np.zeros((1080, 250, 3), dtype=np.uint8)))

    cv2.line(blended, (1950, 130), (2150, 130), colors[0], 5, cv2.LINE_4)
    cv2.line(blended, (1950, 180), (2150, 180), colors[1], 5, cv2.LINE_4)
    for i in range(20):
        tmp = my_round(255. - i * 255. / 19)
        cv2.line(blended, (2150 - (i + 1) * 10, 230), (2150 - i * 10, 230),
                 (255 - tmp, tmp, tmp), 10, cv2.LINE_4)
    
    cv2.putText(blended, "Rotifer", (1930, 120), cv2.FONT_HERSHEY_DUPLEX,
                font_scale, colors[0], font_thickness, cv2.LINE_AA)
    cv2.putText(blended, "Fertilized", (1930, 170), cv2.FONT_HERSHEY_DUPLEX,
                font_scale, colors[1], font_thickness, cv2.LINE_AA)
    cv2.putText(blended, "Trajectory", (1930, 220), cv2.FONT_HERSHEY_DUPLEX,
                font_scale, colors[2], font_thickness, cv2.LINE_AA)

    cv2.putText(blended, str(len(rot_ids[i_frm]) + len(fer_ids[i_frm])),
                (1930, 30), cv2.FONT_HERSHEY_DUPLEX, font_scale, colors[2],
                font_thickness, cv2.LINE_AA)
    cv2.putText(blended, str(len(rot_ids[i_frm])), (2015, 30),
                cv2.FONT_HERSHEY_DUPLEX, font_scale, colors[0], font_thickness,
                cv2.LINE_AA)
    cv2.putText(blended, str(len(fer_ids[i_frm])), (2100, 30),
                cv2.FONT_HERSHEY_DUPLEX, font_scale, colors[1], font_thickness,
                cv2.LINE_AA)

    cv2.putText(blended, str(len(pd_rot) + len(pd_fer)), (1930, 70),
                cv2.FONT_HERSHEY_DUPLEX, font_scale, colors[2], font_thickness,
                cv2.LINE_AA)
    cv2.putText(blended, str(len(pd_rot)), (2015, 70), cv2.FONT_HERSHEY_DUPLEX,
                font_scale, colors[0], font_thickness, cv2.LINE_AA)
    cv2.putText(blended, str(len(pd_fer)), (2100, 70), cv2.FONT_HERSHEY_DUPLEX,
                font_scale, colors[1], font_thickness, cv2.LINE_AA)

    outvideo.write(blended)

invideo.release()
outvideo.release()