# Load video and model
## Video:
- video_test: short videos, 3 folders (HTT, UTTQ, UTDD)
  - HTT: Duodenal -> use model htt.pt
  - UTTQ: Esophageal cancer -> use model thucquan.pt
  - UTDD: Gastric cancer - use model daday.pt
- video_CS: IGH videos from Hoang Long Clinic
  - HTT: Duodenal -> use model htt.pt
  - UTTQ: Esophageal cancer -> use model thucquan.pt
  - UTDD: Gastric cancer - use model daday.pt
- data_pk: 5 videos (inflammatory object) -> can use (3 models) or 5-class model

## YOLOv8 Model
- 3 models and classes:
  - htt.pt: 7_Loet_HTT
  - thucquan.pt: 2_Viem_thuc_quan, 5_Ung_thu_thuc_quan
  - daday.pt: 3_Viem_da_day_HP_am, 4_Viem_da_day_HP_duong, 6_Ung_thu_da_day
- 1 model (5 classes): 5-class-model.pt
  - 0: 2_Viem_thuc_quan
  - 1: 3_Viem_da_day_HP_am
  - 2: 5_Ung_thu_thuc_quan
  - 3: 6_Ung_thu_da_day
  - 4: 7_Loet_HTT

## Re-ID Model
- OSNet (Market1501-based): osnet_x0_25_endocv_30.pt

## Usage
- Mount Drive + Load model
- Load Video data
- Install requirements
- Import libraries
- File define (edit 'name=...' when change the video)
- Class Color, StrongSORT
- Class ObjectDetection (simple use)
- RUN
- Generate txt csv results

The final results will be in folder "/content/runs"

# Mount Drive + Load model

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Load ReID model and 3 detection models

In [None]:
!cp -r /content/drive/MyDrive/torchreid_model/osnet_x0_25_endocv_30.pt /content/drive/MyDrive/ENDOCV/model_pt/model_yolo/daday.pt /content/drive/MyDrive/ENDOCV/model_pt/model_yolo/thucquan.pt /content/drive/MyDrive/ENDOCV/model_pt/model_yolo/htt.pt /content

Load ReID model and 5-class model

In [None]:
!cp -r /content/drive/MyDrive/torchreid_model/osnet_x0_25_endocv_30.pt /content/drive/MyDrive/ENDOCV/model_pt/model_yolo/5-class-model.pt /content

# Load Video data (optional)

In [None]:
!cp -r /content/drive/MyDrive/data_pk /content

In [None]:
!cp -r /content/drive/MyDrive/video_CS /content

In [None]:
!cp -r /content/drive/MyDrive/ENDOCV/video_test /content

# Install requirements

In [None]:
!pip install torch==2.3.0 torchvision==0.18.0 torchaudio==2.3.0 --index-url https://download.pytorch.org/whl/cu121
!pip install ultralytics
!pip install boxmot

Looking in indexes: https://download.pytorch.org/whl/cu121
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch==2.3.0)
  Downloading https://download.pytorch.org/whl/cu121/nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.7/23.7 MB[0m [31m43.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cuda-runtime-cu12==12.1.105 (from torch==2.3.0)
  Downloading https://download.pytorch.org/whl/cu121/nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m823.6/823.6 kB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cuda-cupti-cu12==12.1.105 (from torch==2.3.0)
  Downloading https://download.pytorch.org/whl/cu121/nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.1/14.1 MB[0m [31m92.2 MB/s[0m eta [36m0:0

# Import lib

In [None]:
from ultralytics import YOLO
from pathlib import Path
from time import perf_counter, time
import cv2
import numpy as np
import torch
from boxmot import StrongSORT, BoTSORT, DeepOCSORT, OCSORT, HybridSORT
from pathlib import Path
import sys
from datetime import datetime, timedelta
import pandas as pd
from google.colab.patches import cv2_imshow
import os
import csv

Exception ignored on calling ctypes callback function: <function ThreadpoolController._find_libraries_with_dl_iterate_phdr.<locals>.match_library_callback at 0x7fb2354b35b0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/threadpoolctl.py", line 1005, in match_library_callback
    self._make_controller_from_path(filepath)
  File "/usr/local/lib/python3.10/dist-packages/threadpoolctl.py", line 1175, in _make_controller_from_path
    lib_controller = controller_class(
  File "/usr/local/lib/python3.10/dist-packages/threadpoolctl.py", line 114, in __init__
    self.dynlib = ctypes.CDLL(filepath, mode=_RTLD_NOLOAD)
  File "/usr/lib/python3.10/ctypes/__init__.py", line 374, in __init__
    self._handle = _dlopen(self._name, mode)
OSError: /usr/local/lib/python3.10/dist-packages/numpy.libs/libopenblas64_p-r0-5007b62f.3.23.dev.so: cannot open shared object file: No such file or directory


In [None]:
!mkdir -p /content/runs

# 1.1 File define (video_test)

In [None]:
def get_files(directory):
    files = []
    for filename in os.listdir(directory):
        # Lấy tên file mà không có phần mở rộng
        file_name_without_extension, _ = os.path.splitext(filename)
        files.append(file_name_without_extension)
    return files

directory_A = "/content/video_test/UTDD/"
directory_B = "/content/video_test/UTTQ/"
directory_C = "/content/video_test/HTT/"

files_A = get_files(directory_A)
files_B = get_files(directory_B)
files_C = get_files(directory_C)

vid_utdd_uttq_htt = [files_A, files_B,files_C]

print(vid_utdd_uttq_htt)


[['210518CS101', '230114CS2', '220324CS2', '210504CS205', '220702CS2'], ['220727CS201', '231103CS101', '231109CS101', '220318CS202', '231124CS101', '220922CS201'], []]


### video_test:
- UTDD: ['220702CS2_Trim2', '210518CS101_Trim2', '220324CS2_Trim2', '230320BVK020_Trim2', '230114CS2_Trim', '230114CS2_Trim2', '210518CS101_Trim', '230320BVK020_Trim', '220702CS2_Trim', '220324CS2_Trim']
- UTTQ: ['230411BVK107_Trim', '230411BVK106_Trim2', '220922CS201_Trim', '230411BVK004_Trim', '230411BVK004_Trim2', '220922CS201_Trim2', '230407BVK095_Trim2', '230411BVK104_Trim', '230411BVK106_Trim', '230407BVK095_Trim']
- HTT: ['Da day 211207 CS1 02_Trim']


In [None]:
# Copy and paste video name
name = '231103CS101'


if name in vid_utdd_uttq_htt[0]:  # Kiểm tra xem name có trong chiều 1 không
    test_vid = "/content/video_test/UTDD/" + name + ".mp4"
    model_weights = "/content/daday.pt"
elif name in vid_utdd_uttq_htt[1]:
    test_vid = "/content/video_test/UTTQ/" + name + ".mp4"
    model_weights = "/content/thucquan.pt"
else:
    test_vid = "/content/video_test/HTT/" + name + ".mp4"
    model_weights = "/content/htt.pt"


input_video_name = test_vid.split("/")[-2].split(".")[0] + '_' + test_vid.split("/")[-1].split(".")[0]


# Tạo từ điển ánh xạ giữa tên model_weights và model_classes
model_classes_dict = {
    "/content/daday.pt": ['Viem da day', 'Viem da day' , 'Ung thu da day'],
    "/content/thucquan.pt": ['Viem thuc quan', 'Ung thu thuc quan'],
    "/content/htt.pt": ['Loet HTT']
}

# Thiết lập model_classes từ từ điển, nếu không khớp thì trả về ['polyp', 'esophagael cancer']
model_classes = model_classes_dict.get(model_weights, ['polyp', 'esophagael cancer'])


print("Input Video Name:", input_video_name)
print("Model Classes:", model_classes)

Input Video Name: UTTQ_231103CS101
Model Classes: ['Viem thuc quan', 'Viem da day', 'Ung thu thuc quan', 'Ung thu da day', 'Loet HTT']


# 1.2 File define (video_CS)

In [None]:
def get_files(directory):
    files = []
    for filename in os.listdir(directory):
        # Lấy tên file mà không có phần mở rộng
        file_name_without_extension, _ = os.path.splitext(filename)
        files.append(file_name_without_extension)
    return files

directory_A = "/content/video_CS/UTDD/"
directory_B = "/content/video_CS/UTTQ/"
directory_C = "/content/video_CS/HTT/"

files_A = get_files(directory_A)
files_B = get_files(directory_B)
files_C = get_files(directory_C)

vid_utdd_uttq_htt = [files_A, files_B,files_C]

print(vid_utdd_uttq_htt)


[['210518CS101', '230114CS2', '220324CS2', '210504CS205', '220702CS2'], ['220727CS201', '231103CS101', '231109CS101', '220318CS202', '231124CS101', '220922CS201'], []]


### video_CS:
- UTDD ['210504CS205', '220702CS2', '220324CS2', '210518CS101', '230114CS2'],
- UTTQ ['231103CS101', '220318CS202', '231124CS101', '231109CS101', '220922CS201', '220727CS201']
- HTT []

In [None]:
# Copy and paste video name
name = '231103CS101'


if name in vid_utdd_uttq_htt[0]:  # Kiểm tra xem name có trong chiều 1 không
    test_vid = "/content/video_test/UTDD/" + name + ".mp4"
    model_weights = "/content/daday.pt"
elif name in vid_utdd_uttq_htt[1]:
    test_vid = "/content/video_test/UTTQ/" + name + ".mp4"
    model_weights = "/content/thucquan.pt"
else:
    test_vid = "/content/video_test/HTT/" + name + ".mp4"
    model_weights = "/content/htt.pt"

input_video_name = test_vid.split("/")[-2].split(".")[0] + '_' + test_vid.split("/")[-1].split(".")[0]


# Tạo từ điển ánh xạ giữa tên model_weights và model_classes
model_classes_dict = {
    "/content/daday.pt": ['Viem da day', 'Viem da day' , 'Ung thu da day'],
    "/content/thucquan.pt": ['Viem thuc quan', 'Ung thu thuc quan'],
    "/content/htt.pt": ['Loet HTT']

}

# Thiết lập model_classes từ từ điển, nếu không khớp thì trả về ['polyp', 'esophagael cancer']
model_classes = model_classes_dict.get(model_weights, ['polyp', 'esophagael cancer'])


print("Input Video Name:", input_video_name)
print("Model Classes:", model_classes)

Input Video Name: UTTQ_231103CS101
Model Classes: ['Viem thuc quan', 'Viem da day', 'Ung thu thuc quan', 'Ung thu da day', 'Loet HTT']


# 1.3 File define (data_pk)




In [None]:
def get_files(directory):
    files = []
    for filename in os.listdir(directory):
        # Lấy tên file mà không có phần mở rộng
        file_name_without_extension, _ = os.path.splitext(filename)
        files.append(file_name_without_extension)
    return files

#directory_A = "/content/video_CS/UTDD/"
directory_B = "/content/data_pk/"
#directory_C = "/content/video_CS/HTT/"

#files_A = get_files(directory_A)
files_B = get_files(directory_B)
#files_C = get_files(directory_C)

vid_utdd_uttq_htt = [files_B]

print(vid_utdd_uttq_htt)


[['Da day 220111 CS1 05', 'Da day 200508 CS1 02', 'IGH AINN20 Tổng hợp timeframe video gui CNTT', 'Da day 200530 CS1 02', 'Da day 200512 CS1 01', 'Da day 200926 CS1 01']]


In [None]:
# 'Da day 200926 CS1 01' DD TQ
# 'Da day 200508 CS1 02' DD HTT TQ
# 'Da day 200530 CS1 02' DD TQ HTT
# 'Da day 200512 CS1 01' DD
# 'Da day 220111 CS1 05' DD TQ HTT

name = 'Da day 200512 CS1 01'

if name in vid_utdd_uttq_htt[0]:  # Kiểm tra xem name có trong chiều 1 không
    test_vid = "/content/data_pk/" + name + ".mp4"
    model_weights = "/content/5-class-model.pt
"
#input_video_name = test_vid.split("/")[-2].split(".")[0] + '_' + test_vid.split("/")[-1].split(".")[0]
input_video_name = name


# Tạo từ điển ánh xạ giữa tên model_weights và model_classes
model_classes_dict = {
    "/content/5-class-model.pt": ['Viem thuc quan', 'Viem da day' ,'Ung thu thuc quan', 'Ung thu da day', 'Loet HTT']
}

# Thiết lập model_classes từ từ điển, nếu không khớp thì trả về ['polyp', 'esophagael cancer']
model_classes = model_classes_dict.get(model_weights, ['polyp', 'esophagael cancer'])


print("Input Video Name:", input_video_name)
print("Model Classes:", model_classes)

Input Video Name: Da day 200512 CS1 01
Model Classes: ['Viem da day', 'Viem da day', 'Ung thu da day']


# Class Color, Strongsort

In [None]:
class Colors:
    def __init__(self, num_colors=80):
        self.num_colors = num_colors
        self.color_palette = self.generate_color_palette()


    def generate_color_palette(self):
        hsv_palette = np.zeros((self.num_colors, 1, 3), dtype=np.uint8)
        hsv_palette[:, 0, 0] = np.linspace(0, 180, self.num_colors, endpoint=False)
        hsv_palette[:, :, 1:] = 255
        bgr_palette = cv2.cvtColor(hsv_palette, cv2.COLOR_HSV2BGR)
        return bgr_palette.reshape(-1, 3)

    def __call__(self, class_id):
        color = tuple(map(int, self.color_palette[class_id]))
        return color

In [None]:
import numpy as np

from boxmot.appearance.reid_auto_backend import ReidAutoBackend
from boxmot.motion.cmc import get_cmc_method
from boxmot.trackers.strongsort.sort.detection import Detection
from boxmot.trackers.strongsort.sort.tracker import Tracker
from boxmot.utils.matching import NearestNeighborDistanceMetric
from boxmot.utils.ops import xyxy2tlwh
from boxmot.utils import PerClassDecorator


class StrongSORT(object):
    def __init__(
        self,
        model_weights,
        device,
        fp16,
        per_class=False,
        max_dist=0.2,
        max_iou_dist=0.7,
        max_age=30,
        n_init=1,
        nn_budget=100,
        mc_lambda=0.995,
        ema_alpha=0.9,
    ):
        self.max_dist=0.95,
        self.max_iou_dist=0.95,
        self.max_age=300,
        self.per_class = per_class
        rab = ReidAutoBackend(
            weights=model_weights, device=device, half=fp16
        )
        self.model = rab.get_backend()
        self.tracker = Tracker(
            metric=NearestNeighborDistanceMetric("cosine", max_dist, nn_budget),
            max_iou_dist=max_iou_dist,
            max_age=max_age,
            n_init=n_init,
            mc_lambda=mc_lambda,
            ema_alpha=ema_alpha,
        )
        self.cmc = get_cmc_method('ecc')()

    @PerClassDecorator
    def update(self, dets, img, embs=None):
        assert isinstance(
            dets, np.ndarray
        ), f"Unsupported 'dets' input format '{type(dets)}', valid format is np.ndarray"
        assert isinstance(
            img, np.ndarray
        ), f"Unsupported 'img' input format '{type(img)}', valid format is np.ndarray"
        assert (
            len(dets.shape) == 2
        ), "Unsupported 'dets' dimensions, valid number of dimensions is two"
        assert (
            dets.shape[1] == 6
        ), "Unsupported 'dets' 2nd dimension lenght, valid lenghts is 6"

        dets = np.hstack([dets, np.arange(len(dets)).reshape(-1, 1)])
        xyxy = dets[:, 0:4]
        confs = dets[:, 4]
        clss = dets[:, 5]
        det_ind = dets[:, 6]

        if len(self.tracker.tracks) >= 1:
            warp_matrix = self.cmc.apply(img, xyxy)
            for track in self.tracker.tracks:
                track.camera_update(warp_matrix)

        # extract appearance information for each detection
        if embs is not None:
            features = embs
        else:
            features = self.model.get_features(xyxy, img)

        tlwh = xyxy2tlwh(xyxy)
        detections = [
            Detection(box, conf, cls, det_ind, feat) for
            box, conf, cls, det_ind, feat in
            zip(tlwh, confs, clss, det_ind, features)
        ]

        # update tracker
        self.tracker.predict()
        self.tracker.update(detections)

        # output bbox identities
        outputs = []
        for track in self.tracker.tracks:
            if not track.is_confirmed() or track.time_since_update >= 1:
                continue

            x1, y1, x2, y2 = track.to_tlbr()

            id = track.id
            conf = track.conf
            cls = track.cls
            det_ind = track.det_ind

            outputs.append(
                np.concatenate(([x1, y1, x2, y2], [id], [conf], [cls], [det_ind])).reshape(1, -1)
            )
        if len(outputs) > 0:
            return np.concatenate(outputs)
        return np.array([])

# 2. Class ObjectDetection

## Simple use (recommend)

In [None]:
class ObjectDetection:
    def __init__(self, model_weights="yolov8s.pt", capture_index=0, min_temporal_threshold=0, max_temporal_threshold=0, iou_threshold=0.2, use_frame_id=False):
        self.device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
        print("Using Device: ", self.device)
        self.model = self.load_model(model_weights)
        self.classes = self.model.names
        self.classes = model_classes
        self.colors = Colors(len(self.classes))
        self.font = cv2.FONT_HERSHEY_SIMPLEX
        self.capture_index = capture_index
        self.cap = self.load_capture()
        reid_weights = Path("/content/osnet_x0_25_endocv_30.pt")
        self.tracker = StrongSORT(reid_weights,
                                  torch.device(self.device),
                                  fp16 = False,
                                  max_dist=0.95,
                                  max_iou_dist=0.95,
                                  max_age=300
                                  )
        self.min_temporal_threshold = min_temporal_threshold
        self.max_temporal_threshold = max_temporal_threshold
        self.iou_threshold = iou_threshold
        self.use_frame_id = use_frame_id

    def load_model(self, weights):
        model = YOLO(weights)
        model.fuse()
        return model

    def predict(self, frame):
        results = self.model(frame, stream=True, verbose=False, conf=0.6, line_width=1)
        return results

    def _frame_idx_to_hmsf(self, frame_id: int):
        """convert to hmsf timestamp by given frame idx and fps"""
        self.video_fps = self.cap.get(cv2.CAP_PROP_FPS)
        assert self.video_fps
        base = datetime.strptime('00:00:00.000000', '%H:%M:%S.%f')
        delta = timedelta(seconds=frame_id/self.video_fps)
        return (base + delta).strftime('%H:%M:%S.%f')

    def _frame_idx_to_hms(self, frame_id: int):
        """convert to hms timestamp by given frame idx and fps"""
        self.video_fps = self.cap.get(cv2.CAP_PROP_FPS)
        assert self.video_fps
        base = datetime.strptime('00:00:00', '%H:%M:%S')
        delta = timedelta(seconds=frame_id//self.video_fps)
        return (base + delta).strftime('%H:%M:%S')

    def draw_tracks(self, frame, tracks, txt_file, overlap_threshold=0.5):
        seq_length = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
        frame_rate = self.cap.get(cv2.CAP_PROP_FPS)
        im_width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        im_height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        frame_id = int(self.cap.get(cv2.CAP_PROP_POS_FRAMES))-1
        timestamp_hms = self._frame_idx_to_hms(frame_id)
        timestamp_hmsf = self._frame_idx_to_hmsf(frame_id)
        null_notes = "Tracking"
        for track in tracks:
            x1, y1, x2, y2 = int(track[0]), int(track[1]), int(track[2]), int(track[3])
            id = int(track[4])
            conf = round(track[5], 2)
            class_id = int(track[6])
            class_name = self.classes[class_id]
            cv2.rectangle(frame, (x1,y1), (x2, y2), self.colors(class_id), 5)
            label = f'{class_name}, ID: {id}' # hiển thị
            (w, h), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 1.5, 5)
            cv2.rectangle(frame, (x1, y1+h+15), (x1+w, y1), self.colors(class_id), -1)
            cv2.putText(frame, label, (x1,y1+h+10), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (255,255,255) , 3)
            # Ghi kết quả vào file txt
            center_x = (x1 + x2) / 2
            center_y = (y1 + y2) / 2
            scale_height = frame.shape[0]
            scale_width = frame.shape[1]
            txt_file.write(f"{timestamp_hms},{timestamp_hmsf},{frame_id},{frame_rate},{class_name},{id},{id},{null_notes},{frame.shape[0]},{frame.shape[1]},{scale_height},{scale_width},{x1},{y1},{x2},{y2},{center_x},{center_y}\n")
            #txt_file.write(f"{int(frame_id)},{id},{x1},{y1},{x2-x1},{y2-y1},{conf},-1,-1,-1\n")

        return frame


    def load_capture(self):
        cap = cv2.VideoCapture(self.capture_index)
        assert cap.isOpened()
        fps = cap.get(cv2.CAP_PROP_FPS)
        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
        cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
        video_name = "strongsort_" + input_video_name + ".mp4"
        self.writer = cv2.VideoWriter(video_name
        , cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))
        return cap

    def write_seqinfo_ini(self, seq_name, seq_length, frame_rate, im_width, im_height, im_ext, im_dir):
        with open("seqinfo.ini", "w") as f:
            f.write("[Sequence]\n")
            f.write(f"name={seq_name}\n")
            f.write(f"imDir={im_dir}\n")  # Thay thế bằng thư mục chứa ảnh nếu cần
            f.write(f"frameRate={frame_rate}\n")
            f.write(f"seqLength={seq_length}\n")
            f.write(f"imWidth={im_width}\n")
            f.write(f"imHeight={im_height}\n")
            f.write(f"imExt={im_ext}\n")

    def calculate_iou(self, box1, box2):
        """
        Calculate intersection over union (IoU) between two bounding boxes.

        Parameters:
        - box1 (list): [x1, y1, x2, y2] of the first box.
        - box2 (list): [x1, y1, x2, y2] of the second box.

        Returns:
        - iou (float): Intersection over Union (IoU) value.
        """
        # Calculate intersection area
        x1 = max(box1[0], box2[0])
        y1 = max(box1[1], box2[1])
        x2 = min(box1[2], box2[2])
        y2 = min(box1[3], box2[3])
        intersection_area = max(0, x2 - x1 + 1) * max(0, y2 - y1 + 1)

        # Calculate areas of each bounding box
        box1_area = (box1[2] - box1[0] + 1) * (box1[3] - box1[1] + 1)
        box2_area = (box2[2] - box2[0] + 1) * (box2[3] - box2[1] + 1)

        # Calculate union area
        union_area = box1_area + box2_area - intersection_area

        # Calculate IoU
        iou = intersection_area / union_area

        return iou

    def update_track_id(self, current_tracks, previous_tracks):
        updated_tracks = []
        for current_track in current_tracks:
            min_distance = float('inf')
            matching_track_id = None
            for previous_track in previous_tracks:
                if current_track[6] != previous_track[6]:
                    continue  # Skip tracks of different classes
                iou = self.calculate_iou(current_track[:4], previous_track[:4])
                #print(iou, self.iou_threshold)
                if iou > self.iou_threshold:
                    if self.use_frame_id:
                        time_diff = abs(current_track[3] - previous_track[3])
                        if time_diff < min_distance:
                            min_distance = time_diff
                            matching_track_id = previous_track[4]
                    else:
                        time_diff = abs(current_track[1] - previous_track[1])
                        if time_diff < min_distance:
                            min_distance = time_diff
                            matching_track_id = previous_track[4]

            if matching_track_id is not None:
                current_track[4] = matching_track_id
            updated_tracks.append(current_track)
        return updated_tracks

    def __call__(self):
        tracker = self.tracker

        # Lấy thông tin từ video kết quả
        seq_name = "StrongSort"
        im_dir = "img1"
        seq_length = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
        frame_rate = self.cap.get(cv2.CAP_PROP_FPS)
        im_width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        im_height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        im_ext = ".jpg"  # Phần mở rộng của ảnh

        # Ghi thông tin vào file seqinfo.ini
        self.write_seqinfo_ini(seq_name, seq_length, frame_rate, im_width, im_height, im_ext, im_dir)

        # Mở file txt để ghi kết quả
        with open("tracking_result.txt", "w") as txt_file:
            txt_file.write("timestamp_hms,timestamp_hmsf,frame_idx,fps,object_cls,object_idx,object_id,notes,frame_height,frame_width,scale_height,scale_width,x1,y1,x2,y2,center_x,center_y\n")
            previous_tracks = []
            while True:
                start_time = perf_counter()
                ret, frame = self.cap.read()
                if not ret:
                    break
                cv2.rectangle(frame, (0, 30), (220, 80), (255, 255, 255), -1)
                detections = self.predict(frame)
                for dets in detections:
                    tracks = tracker.update(dets.boxes.data.to("cpu").numpy(), frame)
                    if len(tracks.shape) == 2 and tracks.shape[1] == 8:
                        if len(previous_tracks) > 0:
                            tracks = self.update_track_id(tracks, previous_tracks)
                        frame = self.draw_tracks(frame, tracks, txt_file)
                        previous_tracks = tracks

                end_time = perf_counter()
                fps = 1 / np.round(end_time - start_time, 2)
                cv2.putText(frame, f'FPS: {int(fps)}', (20, 70), self.font, 1.5, (0, 255, 0), 5)
                self.writer.write(frame)
                # cv2_imshow(frame)
                if cv2.waitKey(5) & 0xFF == 27:
                    break
            self.cap.release()
            self.writer.release()
            cv2.destroyAllWindows()



## Nhãn bên trái (Optional)


In [None]:
class ObjectDetection:
    def __init__(self, model_weights="yolov8s.pt", capture_index=0, min_temporal_threshold=0, max_temporal_threshold=0, iou_threshold=0.2, use_frame_id=False):
        self.device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
        print("Using Device: ", self.device)
        self.model = self.load_model(model_weights)
        self.classes = self.model.names
        self.classes = model_classes
        self.colors = Colors(len(self.classes))
        self.font = cv2.FONT_HERSHEY_SIMPLEX
        self.capture_index = capture_index
        self.cap = self.load_capture()
        reid_weights = Path("/content/osnet_x0_25_endocv_30.pt")
        self.tracker = StrongSORT(reid_weights,
                                  torch.device(self.device),
                                  fp16 = False,
                                  max_dist=0.95,
                                  max_iou_dist=0.95,
                                  max_age=300
                                  )
        self.min_temporal_threshold = min_temporal_threshold
        self.max_temporal_threshold = max_temporal_threshold
        self.iou_threshold = iou_threshold
        self.use_frame_id = use_frame_id
        self.labels = {}
        self.saved_images = {}
        self.last_detected_frame = None

    def load_model(self, weights):
        model = YOLO(weights)
        model.fuse()
        return model

    def predict(self, frame):
        results = self.model(frame, stream=True, verbose=False, conf=0.6, line_width=1)
        return results

    def _frame_idx_to_hmsf(self, frame_id: int):
        """convert to hmsf timestamp by given frame idx and fps"""
        self.video_fps = self.cap.get(cv2.CAP_PROP_FPS)
        assert self.video_fps
        base = datetime.strptime('00:00:00.000000', '%H:%M:%S.%f')
        delta = timedelta(seconds=frame_id/self.video_fps)
        return (base + delta).strftime('%H:%M:%S.%f')

    def _frame_idx_to_hms(self, frame_id: int):
        """convert to hms timestamp by given frame idx and fps"""
        self.video_fps = self.cap.get(cv2.CAP_PROP_FPS)
        assert self.video_fps
        base = datetime.strptime('00:00:00', '%H:%M:%S')
        delta = timedelta(seconds=frame_id//self.video_fps)
        return (base + delta).strftime('%H:%M:%S')

    def load_capture(self):
        cap = cv2.VideoCapture(self.capture_index)
        assert cap.isOpened()
        fps = cap.get(cv2.CAP_PROP_FPS)
        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
        cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
        video_name = "strongsort_" + input_video_name + ".mp4"
        self.writer = cv2.VideoWriter(video_name
        , cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))
        return cap

    def write_seqinfo_ini(self, seq_name, seq_length, frame_rate, im_width, im_height, im_ext, im_dir):
        with open("seqinfo.ini", "w") as f:
            f.write("[Sequence]\n")
            f.write(f"name={seq_name}\n")
            f.write(f"imDir={im_dir}\n")  # Thay thế bằng thư mục chứa ảnh nếu cần
            f.write(f"frameRate={frame_rate}\n")
            f.write(f"seqLength={seq_length}\n")
            f.write(f"imWidth={im_width}\n")
            f.write(f"imHeight={im_height}\n")
            f.write(f"imExt={im_ext}\n")

    def calculate_iou(self, box1, box2):
        """
        Calculate intersection over union (IoU) between two bounding boxes.

        Parameters:
        - box1 (list): [x1, y1, x2, y2] of the first box.
        - box2 (list): [x1, y1, x2, y2] of the second box.

        Returns:
        - iou (float): Intersection over Union (IoU) value.
        """
        # Calculate intersection area
        x1 = max(box1[0], box2[0])
        y1 = max(box1[1], box2[1])
        x2 = min(box1[2], box2[2])
        y2 = min(box1[3], box2[3])
        intersection_area = max(0, x2 - x1 + 1) * max(0, y2 - y1 + 1)

        # Calculate areas of each bounding box
        box1_area = (box1[2] - box1[0] + 1) * (box1[3] - box1[1] + 1)
        box2_area = (box2[2] - box2[0] + 1) * (box2[3] - box2[1] + 1)

        # Calculate union area
        union_area = box1_area + box2_area - intersection_area

        # Calculate IoU
        iou = intersection_area / union_area

        return iou

    def update_track_id(self, current_tracks, previous_tracks):
        updated_tracks = []
        for current_track in current_tracks:
            min_distance = float('inf')
            matching_track_id = None
            for previous_track in previous_tracks:
                if current_track[6] != previous_track[6]:
                    continue  # Skip tracks of different classes
                iou = self.calculate_iou(current_track[:4], previous_track[:4])
                #print(iou, self.iou_threshold)
                if iou > self.iou_threshold:
                    if self.use_frame_id:
                        time_diff = abs(current_track[3] - previous_track[3])
                        if time_diff < min_distance:
                            min_distance = time_diff
                            matching_track_id = previous_track[4]
                    else:
                        time_diff = abs(current_track[1] - previous_track[1])
                        if time_diff < min_distance:
                            min_distance = time_diff
                            matching_track_id = previous_track[4]

            if matching_track_id is not None:
                current_track[4] = matching_track_id
            updated_tracks.append(current_track)
        return updated_tracks

    def draw_tracks(self, frame, tracks, txt_file, overlap_threshold=0.5):
        seq_length = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
        frame_rate = self.cap.get(cv2.CAP_PROP_FPS)
        im_width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        im_height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        frame_id = int(self.cap.get(cv2.CAP_PROP_POS_FRAMES))-1
        timestamp_hms = self._frame_idx_to_hms(frame_id)
        timestamp_hmsf = self._frame_idx_to_hmsf(frame_id)
        null_notes = "Tracking"
        for track in tracks:
            x1, y1, x2, y2 = int(track[0]), int(track[1]), int(track[2]), int(track[3])
            id = int(track[4])
            conf = round(track[5], 2)
            class_id = int(track[6])
            class_name = self.classes[class_id]
            cv2.rectangle(frame, (x1,y1), (x2, y2), self.colors(class_id), 5)
            self.save_first_detected_frame(frame, track)
            # Update label if the object ID is new or changed
            if id not in self.labels:
                self.labels[id] = class_name

            # Write result to txt file
            center_x = (x1 + x2) / 2
            center_y = (y1 + y2) / 2
            scale_height = frame.shape[0]
            scale_width = frame.shape[1]
            txt_file.write(f"{timestamp_hms},{timestamp_hmsf},{frame_id},{frame_rate},{class_name},{id},{id},{null_notes},{frame.shape[0]},{frame.shape[1]},{scale_height},{scale_width},{x1},{y1},{x2},{y2},{center_x},{center_y}\n")
            #detected_ids.add(id)
        return frame

    def display_labels(self, frame, tracks):
        # Tạo một từ điển để lưu trữ các nhãn đã được gán
        labels_dict = {}

        # Lặp qua các tracks và cập nhật từ điển labels_dict
        for track in tracks:
            id = int(track[4])
            class_id = int(track[6])
            class_name = self.classes[class_id]
            labels_dict[id] = class_name

        # Biến lưu màu của nhãn trước đó
        previous_label_colors = {}

        # Hiển thị nhãn trên khung hình
        for id, label in self.labels.items():
            label = f'{self.labels[id]}, ID: {id}'
            if id in labels_dict:
                # Nếu đối tượng có trong tracks, hiển thị nhãn mới
                self.labels[id] = labels_dict[id]
                class_id = int(track[6])
                label_color = (0, 255, 0)
                previous_label_colors[id] = label_color  # Lưu màu của nhãn mới
            else:
                # Nếu không phát hiện được đối tượng trong frame, sử dụng màu của nhãn trước đó
                label_color = previous_label_colors.get(id, (0, 0, 255))
            # Hiển thị nhãn trên khung hình
            (w, h), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 1.5, 5)
            label_x = 0
            label_y = 50 + h
            cv2.rectangle(frame, (label_x, label_y - h - 15), (label_x + w + 10,label_y + 10), (0, 0, 0), -1)
            cv2.putText(frame, label, (label_x + 5, label_y - 5), cv2.FONT_HERSHEY_SIMPLEX, 1.5, label_color, 3)

        return frame
    def save_first_detected_frame(self, frame, track):
        x1, y1, x2, y2 = int(track[0]), int(track[1]), int(track[2]), int(track[3])
        id = int(track[4])
        class_id = int(track[6])
        key = (id, class_id)
        if key not in self.saved_images:
            object_img = frame[y1:y2, x1:x2]
            height, width = object_img.shape[:2]
            aspect_ratio = width / height
            new_width = 300
            new_height = int(new_width / aspect_ratio)
            resized_img = cv2.resize(object_img, (new_width, new_height))
            self.saved_images[key] = resized_img

    def draw_saved_images(self, frame):
        for (id, class_id), img in self.saved_images.items():
            x_offset = 20
            y_offset = 100
            y_end = y_offset + img.shape[0]
            x_end = x_offset + img.shape[1]
            frame[y_offset:y_end, x_offset:x_end] = img
        return frame


    def __call__(self):
        tracker = self.tracker

        # Lấy thông tin từ video kết quả
        seq_name = "StrongSort"
        im_dir = "img1"
        seq_length = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
        frame_rate = self.cap.get(cv2.CAP_PROP_FPS)
        im_width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        im_height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        im_ext = ".jpg"  # Phần mở rộng của ảnh

        # Ghi thông tin vào file seqinfo.ini
        self.write_seqinfo_ini(seq_name, seq_length, frame_rate, im_width, im_height, im_ext, im_dir)

        # Mở file txt để ghi kết quả
        with open("tracking_result.txt", "w") as txt_file:
            txt_file.write("timestamp_hms,timestamp_hmsf,frame_idx,fps,object_cls,object_idx,object_id,notes,frame_height,frame_width,scale_height,scale_width,x1,y1,x2,y2,center_x,center_y\n")
            previous_tracks = []
            while True:
                start_time = perf_counter()
                ret, frame = self.cap.read()
                if not ret:
                    break

                label = "Unknown"
                (w, h), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 1.5, 5)
                label_x = 0
                label_y = 50 + h
                cv2.rectangle(frame, (label_x, label_y - h - 15), (label_x + w + 10, label_y + 10), (0, 0, 0), -1)
                cv2.putText(frame, label, (label_x + 5, label_y - 5), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (255, 255, 255), 3)

                detections = self.predict(frame)
                for dets in detections:
                    tracks = tracker.update(dets.boxes.data.to("cpu").numpy(), frame)
                    if len(tracks.shape) == 2 and tracks.shape[1] == 8:
                        if len(previous_tracks) > 0:
                            tracks = self.update_track_id(tracks, previous_tracks)
                        frame = self.draw_tracks(frame, tracks, txt_file)
                        previous_tracks = tracks
                self.display_labels(frame, tracks)
                self.draw_saved_images(frame)
                end_time = perf_counter()
                # fps = 1 / np.round(end_time - start_time, 2)
                # cv2.rectangle(frame, (0, 30), (220, 80), (255, 255, 255), -1)
                # cv2.putText(frame, f'FPS: {int(fps)}', (20, 70), self.font, 1.5, (0, 255, 0), 5)
                self.writer.write(frame)
                # cv2_imshow(frame)
                if cv2.waitKey(5) & 0xFF == 27:
                    break
            self.cap.release()
            self.writer.release()
            cv2.destroyAllWindows()

## Nhãn bên phải (Optional)

In [None]:
class ObjectDetection:
    def __init__(self, model_weights="yolov8s.pt", capture_index=0, min_temporal_threshold=0, max_temporal_threshold=0, iou_threshold=0.2, use_frame_id=False):
        self.device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
        print("Using Device: ", self.device)
        self.model = self.load_model(model_weights)
        self.classes = self.model.names
        self.classes = model_classes
        self.colors = Colors(len(self.classes))
        self.font = cv2.FONT_HERSHEY_SIMPLEX
        self.capture_index = capture_index
        self.cap = self.load_capture()
        reid_weights = Path("/content/osnet_x0_25_endocv_30.pt")
        self.tracker = StrongSORT(reid_weights,
                                  torch.device(self.device),
                                  fp16 = False,
                                  max_dist=0.95,
                                  max_iou_dist=0.95,
                                  max_age=300
                                  )
        self.min_temporal_threshold = min_temporal_threshold
        self.max_temporal_threshold = max_temporal_threshold
        self.iou_threshold = iou_threshold
        self.use_frame_id = use_frame_id
        self.labels = {}
        self.saved_images = {}
        self.last_detected_frame = None

    def load_model(self, weights):
        model = YOLO(weights)
        model.fuse()
        return model

    def predict(self, frame):
        results = self.model(frame, stream=True, verbose=False, conf=0.5, line_width=1)
        return results

    def _frame_idx_to_hmsf(self, frame_id: int):
        """convert to hmsf timestamp by given frame idx and fps"""
        self.video_fps = self.cap.get(cv2.CAP_PROP_FPS)
        assert self.video_fps
        base = datetime.strptime('00:00:00.000000', '%H:%M:%S.%f')
        delta = timedelta(seconds=frame_id/self.video_fps)
        return (base + delta).strftime('%H:%M:%S.%f')

    def _frame_idx_to_hms(self, frame_id: int):
        """convert to hms timestamp by given frame idx and fps"""
        self.video_fps = self.cap.get(cv2.CAP_PROP_FPS)
        assert self.video_fps
        base = datetime.strptime('00:00:00', '%H:%M:%S')
        delta = timedelta(seconds=frame_id//self.video_fps)
        return (base + delta).strftime('%H:%M:%S')

    def load_capture(self):
        cap = cv2.VideoCapture(self.capture_index)
        assert cap.isOpened()
        fps = cap.get(cv2.CAP_PROP_FPS)
        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
        cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
        video_name = "tracking_" + input_video_name + ".mp4"
        self.writer = cv2.VideoWriter(video_name, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))
        return cap

    def write_seqinfo_ini(self, seq_name, seq_length, frame_rate, im_width, im_height, im_ext, im_dir):
        with open("seqinfo.ini", "w") as f:
            f.write("[Sequence]\n")
            f.write(f"name={seq_name}\n")
            f.write(f"imDir={im_dir}\n")  # Thay thế bằng thư mục chứa ảnh nếu cần
            f.write(f"frameRate={frame_rate}\n")
            f.write(f"seqLength={seq_length}\n")
            f.write(f"imWidth={im_width}\n")
            f.write(f"imHeight={im_height}\n")
            f.write(f"imExt={im_ext}\n")

    def calculate_iou(self, box1, box2):
        # Calculate intersection area
        x1 = max(box1[0], box2[0])
        y1 = max(box1[1], box2[1])
        x2 = min(box1[2], box2[2])
        y2 = min(box1[3], box2[3])
        intersection_area = max(0, x2 - x1 + 1) * max(0, y2 - y1 + 1)

        # Calculate areas of each bounding box
        box1_area = (box1[2] - box1[0] + 1) * (box1[3] - box1[1] + 1)
        box2_area = (box2[2] - box2[0] + 1) * (box2[3] - box2[1] + 1)

        # Calculate union area
        union_area = box1_area + box2_area - intersection_area

        # Calculate IoU
        iou = intersection_area / union_area

        return iou

    def update_track_id(self, current_tracks, previous_tracks):
        updated_tracks = []
        for current_track in current_tracks:
            min_distance = float('inf')
            matching_track_id = None
            for previous_track in previous_tracks:
                if current_track[6] != previous_track[6]:
                    continue  # Skip tracks of different classes
                iou = self.calculate_iou(current_track[:4], previous_track[:4])
                #print(iou, self.iou_threshold)
                if iou > self.iou_threshold:
                    if self.use_frame_id:
                        time_diff = abs(current_track[3] - previous_track[3])
                        if time_diff < min_distance:
                            min_distance = time_diff
                            matching_track_id = previous_track[4]
                    else:
                        time_diff = abs(current_track[1] - previous_track[1])
                        if time_diff < min_distance:
                            min_distance = time_diff
                            matching_track_id = previous_track[4]

            if matching_track_id is not None:
                current_track[4] = matching_track_id
            updated_tracks.append(current_track)
        return updated_tracks

    def draw_tracks(self, frame, tracks, txt_file, overlap_threshold=0.5):
        seq_length = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
        frame_rate = self.cap.get(cv2.CAP_PROP_FPS)
        im_width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        im_height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        frame_id = int(self.cap.get(cv2.CAP_PROP_POS_FRAMES))-1
        timestamp_hms = self._frame_idx_to_hms(frame_id)
        timestamp_hmsf = self._frame_idx_to_hmsf(frame_id)
        null_notes = "Tracking"
        labels_dict = {}
        for track in tracks:
            x1, y1, x2, y2 = int(track[0]), int(track[1]), int(track[2]), int(track[3])
            id = int(track[4])
            conf = round(track[5], 2)
            class_id = int(track[6])
            class_name = self.classes[class_id]
            cv2.rectangle(frame, (x1,y1), (x2, y2), self.colors(class_id), 5)
            # self.save_first_detected_frame(frame, track)
            # Write result to txt file
            center_x = (x1 + x2) / 2
            center_y = (y1 + y2) / 2
            scale_height = frame.shape[0]
            scale_width = frame.shape[1]
            # Update label if the object ID is new or changed
            if id not in self.labels:
                self.labels[id] = class_name
            self.save_first_detected_frame(frame, track)
            txt_file.write(f"{timestamp_hms},{timestamp_hmsf},{frame_id},{frame_rate},{class_name},{id},{id},{null_notes},{frame.shape[0]},{frame.shape[1]},{scale_height},{scale_width},{x1},{y1},{x2},{y2},{center_x},{center_y}\n")

        return frame

    def display_labels(self, frame, tracks):
        # Tạo một từ điển để lưu trữ các nhãn đã được gán
        frame_id = int(self.cap.get(cv2.CAP_PROP_POS_FRAMES))-1
        labels_dict = {}
        last_detection_times = {}
        previous_label_colors = {}

        # Lặp qua các tracks và cập nhật từ điển labels_dict
        for track in tracks:
            id = int(track[4])
            class_id = int(track[6])
            class_name = self.classes[class_id]
            labels_dict[id] = class_name
        # Hiển thị nhãn trên khung hình
        for id, label in self.labels.items():
            # label = f'{self.labels[id]}, ID: {id}'
            if id in labels_dict:
                # Nếu đối tượng có trong tracks, hiển thị nhãn mới
                self.labels[id] = labels_dict[id]
                class_id = int(track[6])
                label_color = self.colors(class_id)
                previous_label_colors[id] = label_color
                last_detection_times[id] = time()  # Lưu màu của nhãn mới
                label = f'{self.labels[id]}, ID: {id}'
            else:
                # Nếu không phát hiện được đối tượng trong frame, sử dụng màu của nhãn trước đó
                label_color = previous_label_colors.get(id, (255, 255, 255))

            self.labels = {}

            # Hiển thị nhãn trên khung hình
            (w, h), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 1.5, 5)
            label_x = frame.shape[1] - w - 20
            label_y = 50 + h
            cv2.rectangle(frame, (label_x, label_y - h - 15), (label_x + w + 10,label_y + 10), (0, 0, 0), -1)
            cv2.putText(frame, label, (label_x + 5, label_y - 5), cv2.FONT_HERSHEY_SIMPLEX, 1.5, label_color, 3)

        return frame

    def save_first_detected_frame(self, frame, track):
        x1, y1, x2, y2 = int(track[0]), int(track[1]), int(track[2]), int(track[3])
        id = int(track[4])
        class_id = int(track[6])
        key = (id, class_id)

        if hasattr(self, 'last_saved_key') and self.last_saved_key != key:
            # Clear the saved images if there is a change in class or id
            self.saved_images.clear()

        if key not in self.saved_images:
            object_img = frame[y1:y2, x1:x2]
            height, width = object_img.shape[:2]
            #print(height, width)
            if height > 0:
              aspect_ratio = width / height
              new_width = 300
              if aspect_ratio == 0:
                new_height = 300
              else:
                new_height = int(new_width / aspect_ratio)

              if new_height > 980:
                  new_height = 980
                  new_width = int(new_height * aspect_ratio)

              resized_img = cv2.resize(object_img, (new_width, new_height))

            if height <= 0:
              resized_img = cv2.resize(object_img, (300, height))

            self.saved_images[key] = resized_img
            self.last_saved_key = key

    def draw_saved_images(self, frame):
        for (id, class_id), img in self.saved_images.items():
            x_offset = 1600
            y_offset = 100
            y_end = y_offset + img.shape[0]
            x_end = x_offset + img.shape[1]
            cv2.rectangle(frame, (1600, 100), (x_end, 1080), (0, 0, 0), -1)
            frame[y_offset:y_end, x_offset:x_end] = img
            #print(img.shape)
        return frame


    def __call__(self):
        tracker = self.tracker

        # Lấy thông tin từ video kết quả
        seq_name = "StrongSort"
        im_dir = "img1"
        seq_length = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
        frame_rate = self.cap.get(cv2.CAP_PROP_FPS)
        im_width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        im_height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        im_ext = ".jpg"  # Phần mở rộng của ảnh

        # Ghi thông tin vào file seqinfo.ini
        self.write_seqinfo_ini(seq_name, seq_length, frame_rate, im_width, im_height, im_ext, im_dir)

        # Mở file txt để ghi kết quả
        with open("tracking_result.txt", "w") as txt_file:
            txt_file.write("timestamp_hms,timestamp_hmsf,frame_idx,fps,object_cls,object_idx,object_id,notes,frame_height,frame_width,scale_height,scale_width,x1,y1,x2,y2,center_x,center_y\n")
            previous_tracks = []
            while True:
                start_time = perf_counter()
                ret, frame = self.cap.read()
                if not ret:
                    break

                label = "Unknown"
                (w, h), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 1.5, 5)
                label_x = frame.shape[1] - w - 20
                label_y = 50 + h
                cv2.rectangle(frame, (label_x, label_y - h - 15), (label_x + w + 10, label_y + 10), (0, 0, 0), -1)
                cv2.putText(frame, label, (label_x + 5, label_y - 5), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (255, 255, 255), 3)
                detections = self.predict(frame)
                for dets in detections:
                    tracks = tracker.update(dets.boxes.data.to("cpu").numpy(), frame)
                    if len(tracks.shape) == 2 and tracks.shape[1] == 8:
                        if len(previous_tracks) > 0:
                            tracks = self.update_track_id(tracks, previous_tracks)
                        frame = self.draw_tracks(frame, tracks, txt_file)
                        previous_tracks = tracks
                self.display_labels(frame, tracks)
                self.draw_saved_images(frame)
                end_time = perf_counter()
                # fps = 1 / np.round(end_time - start_time, 2)
                # cv2.rectangle(frame, (0, 30), (220, 80), (255, 255, 255), -1)
                # cv2.putText(frame, f'FPS: {int(fps)}', (20, 70), self.font, 1.5, (0, 255, 0), 5)
                self.writer.write(frame)
                #cv2_imshow(frame)
                if cv2.waitKey(5) & 0xFF == 27:
                    break
            self.cap.release()
            self.writer.release()
            cv2.destroyAllWindows()

# 3. RUN

In [None]:
detector = ObjectDetection(model_weights, test_vid)
detector()
video_name = "tracking_" + input_video_name + ".mp4"
print(video_name)

Using Device:  cuda:0
Model summary (fused): 218 layers, 25841497 parameters, 0 gradients, 78.7 GFLOPs


[32m2024-06-21 09:12:24.919[0m | [1mINFO    [0m | [36mboxmot.utils.torch_utils[0m:[36mselect_device[0m:[36m52[0m - [1mYolo Tracking v10.0.72 🚀 Python-3.10.12 torch-2.2.2+cu121
CUDA:0 (Tesla T4, 15102MiB)[0m
[32m2024-06-21 09:12:25.291[0m | [32m[1mSUCCESS [0m | [36mboxmot.appearance.reid_model_factory[0m:[36mload_pretrained_weights[0m:[36m207[0m - [32m[1mSuccessfully loaded pretrained weights from "/content/osnet_x0_25_endocv_30.pt"[0m


error: OpenCV(4.8.0) /io/opencv/modules/imgproc/src/resize.cpp:4062: error: (-215:Assertion failed) !ssize.empty() in function 'resize'


# 4. Generate txt csv results

In [None]:
def txt_to_csv(input_txt_file, output_csv_file):
    with open(input_txt_file, 'r') as infile, open(output_csv_file, 'w', newline='') as outfile:
        reader = csv.reader(infile, delimiter=',')
        writer = csv.writer(outfile)

        for row in reader:
            writer.writerow(row)


def convert_file(input_file, output_file):
    with open(input_file, 'r') as infile, open(output_file, 'w') as outfile:
        lines = infile.readlines()
        for line in lines[1:]:  # Skip the first line (header)
            parts = line.strip().split(',')
            if len(parts) < 17:
                continue  # Skip lines that do not have enough values

            frame_id = parts[2]
            object_id = parts[5]
            x1 = int(parts[12])
            y1 = int(parts[13])
            x2 = int(parts[14])
            y2 = int(parts[15])
            conf = round(float(parts[6]), 2)

            width = x2 - x1
            height = y2 - y1

            # Write to the output file
            outfile.write(f"{frame_id},{object_id},{x1},{y1},{width},{height},{conf},-1,-1,-1\n")

# Usage
input_file = 'tracking_result.txt'
output_mot_file = 'mot_result.txt'
output_csv_file = "tracking_" + input_video_name +'.csv'
convert_file(input_file, output_mot_file)
txt_to_csv(input_file, output_csv_file)

In [None]:
# Tạo thư mục với tên giống với video_name trong /content/run
run_folder = "/content/runs/htt"
video_name = "tracking_" + input_video_name + ".mp4"
video_folder = os.path.join(run_folder, video_name)
if not os.path.exists(video_folder):
    os.makedirs(video_folder)

# Di chuyển video, seqinfo.ini và results.txt vào thư mục vừa tạo
os.rename(video_name, os.path.join(video_folder, video_name))
os.rename("seqinfo.ini", os.path.join(video_folder, "seqinfo.ini"))
os.rename("mot_result.txt", os.path.join(video_folder, "mot_result.txt"))
os.rename("tracking_result.txt", os.path.join(video_folder, "tracking_result.txt"))
os.rename(output_csv_file, os.path.join(video_folder, output_csv_file))
#os.rename('detect_'+input_video_name+'.mp4', os.path.join(video_folder,'detect_'+input_video_name+'.mp4'))
print(video_name)

In [None]:
from google.colab import drive
import shutil
import os
from google.colab import files



# Step 2: Define the folder to be zipped and the output zip file path
folder_path = '/content/runs'
output_zip_path = '/content/run.zip'

# Step 3: Zip the folder
shutil.make_archive(output_zip_path.replace('.zip', ''), 'zip', folder_path)

# Step 4: Verify the zip file is created
if os.path.exists(output_zip_path):
    print(f'Zip file created successfully: {output_zip_path}')
else:
    print('Error in creating zip file')

# Step 5: Download the zip file
#files.download(output_zip_path)


Zip file created successfully: /content/run2006.zip


In [None]:
!cp -r /content/run.zip /content/drive/MyDrive