## Group No

## Group Member Names:
1. SHIVAM SAHIL - 2023AA05663
2. JAHNAVI GALI - 2023AA05684
3. PRASHANT KUMAR - 2023AA05043
4. SAHIL MEHRA - 2023AA05327

In [None]:
# Necessary Imports
import os
import cv2
import torch
import numpy as np
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim

import torchvision
from torchvision.transforms import Compose, ToPILImage, ToTensor, RandomHorizontalFlip, ColorJitter
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from tqdm import tqdm

from filterpy.kalman import KalmanFilter
from scipy.optimize import linear_sum_assignment
from IPython.display import clear_output
import matplotlib.pyplot as plt
from torchmetrics.detection.mean_ap import MeanAveragePrecision


import ssl

ssl._create_default_https_context = ssl._create_stdlib_context

In [69]:
# ---- A. Custom Dataset for MOT17 ----
class MOT17DetectionDataset(Dataset):
    """
    Reads images and bounding boxes from the MOT17 'det.txt' file.
    Treats the detection bounding boxes as training labels (not usual practice, but for demonstration).
    """
    def __init__(self, seq_dir, transform=None, is_test_dir = False):
        """
        seq_dir: path to a sequence directory, e.g. 'MOT17-02-DPM'.
                 This directory should contain:
                   - img1/ (folder with images)
                   - det/det.txt (file with detection bboxes)
        transform: augmentations and preprocessing (transforms.Compose)
        """
        super().__init__()
        self.seq_dir = seq_dir
        self.img_dir = os.path.join(seq_dir, 'img1')
        self.is_test_dir = is_test_dir
        if self.is_test_dir: self.det_file = os.path.join(seq_dir, 'det', 'det.txt')
        else: self.det_file = os.path.join(seq_dir, 'gt', 'gt.txt')
        
        self.transform = transform
        
        # Parse det.txt into a dict { frame_id : [ list of bboxes ] }
        # Each bbox is [x1, y1, x2, y2, confidence]
        self.frame_to_boxes = {}
        with open(self.det_file, 'r') as f:
            for line in f:
                # Format: frame_id, track_id, x, y, w, h, conf, ...
                # Some files have more columns, we focus on the first 7
                vals = line.strip().split(',')
                frame_id = int(vals[0])
                x = float(vals[2])
                y = float(vals[3])
                w = float(vals[4])
                h = float(vals[5])
                confidence = float(vals[6])

                # Convert to x1,y1,x2,y2
                x1, y1 = x, y
                x2, y2 = x + w, y + h

                if frame_id not in self.frame_to_boxes:
                    self.frame_to_boxes[frame_id] = []
                self.frame_to_boxes[frame_id].append([x1, y1, x2, y2, confidence])
        
        # Gather all possible frames from img1 folder
        # Typically, images are named 000001.jpg, 000002.jpg, etc.
        self.img_files = sorted([
            os.path.join(self.img_dir, f) 
            for f in os.listdir(self.img_dir) 
            if f.endswith('.jpg')
        ])
        
    def __len__(self):
        return len(self.img_files)
    
    def get_item_training(self, idx):
        img_path = self.img_files[idx]
        # Frame numbering in MOT17 typically starts at 1, so let's parse from filename
        # e.g., 000069.jpg -> frame_id = 69
        file_name = os.path.basename(img_path)
        frame_id = int(file_name.split('.')[0])  # "000069" => 69

        # Read the image
        img_bgr = cv2.imread(img_path)
        img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
        
        # Gather bounding boxes for this frame
        boxes_info = self.frame_to_boxes.get(frame_id, [])
        
        # For demonstration, let's label everything as class 1 = "object"
        # (PyTorch detection models expect labels >= 1)
        boxes = []
        labels = []
        for (x1, y1, x2, y2, conf) in boxes_info:
            boxes.append([x1, y1, x2, y2])
            labels.append(1)  # single-class approach
        
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        
        # Create target dictionary
        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = torch.tensor([idx])  # or frame_id
        
        if self.transform:
            img_pil = ToPILImage()(img_rgb)
            img_pil = self.transform(img_pil)
            image = img_pil
        else:
            # Convert numpy -> torch
            image = ToTensor()(img_rgb)
        
        return image, target
    
    def get_item_test(self, idx):
        """
        Returns:
            image (Tensor): the frame
            detections (Tensor): bounding boxes [N, 5], each row is (x, y, w, h, conf)
            frame_id (int)
        """
        img_path = self.img_files[idx]
        file_name = os.path.basename(img_path)
        frame_id = int(file_name.split('.')[0])  # e.g. '000001.jpg' -> 1
        
        img_bgr = cv2.imread(img_path)
        img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)

        # Convert to PyTorch tensor
        if self.transform:
            image = self.transform(img_rgb)
        else:
            image = ToTensor()(img_rgb)
        
        # Get detections for this frame
        dets = self.frame_to_boxes.get(frame_id, [])
        dets_tensor = torch.as_tensor(dets, dtype=torch.float32)  # shape [N, 5]
        
        return image, dets_tensor, frame_id
        
    
    def __getitem__(self, idx):
        if self.is_test_dir:return self.get_item_test(idx = idx)
        else:return self.get_item_training(idx = idx)


In [57]:
# ---- B. Defining Transformations ----
def get_train_transform():
    return Compose([
        # We'll do only these two to avoid complicated box coordinate transforms
        RandomHorizontalFlip(p=0.5),
        ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
        ToTensor()  # Convert PIL image to torch tensor
    ])

def get_test_transform():
    return Compose([
        ToTensor()
    ])
    
def get_faster_rcnn_model(num_classes):
    """
    Loads a pre-trained Faster R-CNN with ResNet50-FPN backbone
    and modifies the box predictor for 'num_classes' classes.
    """
    # 1. Load a pre-trained model
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights="DEFAULT")
    
    # 2. Get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    
    # 3. Replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    
    return model

In [58]:
seq_path = r'mot17/MOT17/train/MOT17-02-FRCNN'
train_dataset = MOT17DetectionDataset(seq_dir=seq_path, transform=get_train_transform())
def my_collate_fn(batch):
    return tuple(zip(*batch))
train_loader = DataLoader(
    train_dataset,
    batch_size=2,
    shuffle=True,
    num_workers=0,
    collate_fn=my_collate_fn
)

## Model Training

In [61]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

num_classes = 2  # 1 class + background (assuming all objects share one label)
model = get_faster_rcnn_model(num_classes).to(device)

optimizer = optim.SGD(model.parameters(), lr=0.005, momentum=0.9, weight_decay=0.0005)
num_epochs = 10

def train_one_epoch(model, optimizer, data_loader, device):
    model.train()
    total_loss = 0.0
    pbar = tqdm(data_loader, desc="Training", unit="batch")
    
    for images, targets in pbar:
        # Move images and targets to the GPU (if available)
        images = [img.to(device) for img in images]
        new_targets = []
        for t in targets:
            new_t = {
                "boxes": t["boxes"].to(device),
                "labels": t["labels"].to(device),
                "image_id": t["image_id"].to(device)
            }
            new_targets.append(new_t)
        
        # Forward pass
        loss_dict = model(images, new_targets)
        losses = sum(loss for loss in loss_dict.values())
        
        # Backpropagation
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()
        
        total_loss += losses.item()
        
        # Update progress bar with the latest loss value
        pbar.set_postfix(loss=f"{losses.item():.4f}")
    
    avg_loss = total_loss / len(data_loader)
    pbar.close()
    return avg_loss

for epoch in range(num_epochs):
    epoch_loss = train_one_epoch(model, optimizer, train_loader, device)
    print(f"Epoch [{epoch+1}/{num_epochs}] - Loss: {epoch_loss:.4f}")


Training: 100%|██████████| 300/300 [17:31<00:00,  3.51s/batch, loss=0.9637]


Epoch [1/10] - Loss: 1.1765


Training: 100%|██████████| 300/300 [17:32<00:00,  3.51s/batch, loss=0.8213]


Epoch [2/10] - Loss: 0.8566


Training: 100%|██████████| 300/300 [2:07:38<00:00, 25.53s/batch, loss=0.6320]   


Epoch [3/10] - Loss: 0.7228


Training: 100%|██████████| 300/300 [2:21:19<00:00, 28.26s/batch, loss=0.8592]    


Epoch [4/10] - Loss: 0.6319


Training: 100%|██████████| 300/300 [2:25:53<00:00, 29.18s/batch, loss=0.6048]   


Epoch [5/10] - Loss: 0.5633


Training: 100%|██████████| 300/300 [27:46<00:00,  5.55s/batch, loss=0.5225]   


Epoch [6/10] - Loss: 0.5116


Training: 100%|██████████| 300/300 [17:45<00:00,  3.55s/batch, loss=0.4809]


Epoch [7/10] - Loss: 0.4681


Training: 100%|██████████| 300/300 [17:45<00:00,  3.55s/batch, loss=0.4718]


Epoch [8/10] - Loss: 0.4417


Training: 100%|██████████| 300/300 [18:04<00:00,  3.62s/batch, loss=0.3696]


Epoch [9/10] - Loss: 0.4195


Training: 100%|██████████| 300/300 [18:11<00:00,  3.64s/batch, loss=0.3546]

Epoch [10/10] - Loss: 0.3917





In [None]:
# Since model took too long to train, we don't want to lose it later
def checkpoint_and_save_model(epoch, model, optimizer, checkpoint_path = "fasterrcnn_checkpoint.pth"):
    torch.save({
        'epoch': epoch,  # e.g. last epoch you finished
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'loss': epoch_loss
        }, checkpoint_path)
    print(f"Checkpoint saved at {checkpoint_path}")
    
def reload_model_into_memory(checkpoint_path = "fasterrcnn_checkpoint.pth"):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = get_faster_rcnn_model(num_classes).to(device)
    optimizer = optim.SGD(model.parameters(), lr=0.005, momentum=0.9, weight_decay=0.0005)
    resume_checkpoint = torch.load(checkpoint_path, map_location=device)
    # Restore model and optimizer
    model.load_state_dict(resume_checkpoint['model_state_dict'])
    optimizer.load_state_dict(resume_checkpoint['optimizer_state_dict'])

    # Start training from the next epoch
    start_epoch = resume_checkpoint['epoch'] + 1
    resume_loss = resume_checkpoint['loss']
    print(f"Resuming training from epoch {start_epoch} with previous loss: {resume_loss}")
    return model, optimizer, resume_checkpoint, start_epoch, resume_loss

def train_model(checkpoint_path = "fasterrcnn_checkpoint.pth", additional_epochs = 5):
    num_epochs_to_run = additional_epochs  # how many more epochs you want
    model, optimizer, _, start_epoch, _ = reload_model_into_memory(checkpoint_path=checkpoint_path)
    for epoch in range(start_epoch, start_epoch + num_epochs_to_run):
        epoch_loss = train_one_epoch(model, optimizer, train_loader, device)
        print(f"Epoch [{epoch+1}/{start_epoch + num_epochs_to_run}] - Loss: {epoch_loss:.4f}")
        # Optional: save checkpoint after each epoch
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': epoch_loss
        }, checkpoint_path)
        

# Saving long running model for re-usability purposes
checkpoint_and_save_model(epoch,model,optimizer)
    

Checkpoint saved at fasterrcnn_checkpoint.pth


In [73]:
def iou(boxA, boxB):
    """
    boxA, boxB: [x1, y1, x2, y2]
    Return IoU of these two boxes.
    """
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])
    interArea = max(0, xB - xA) * max(0, yB - yA)
    boxAArea = (boxA[2] - boxA[0]) * (boxA[3] - boxA[1])
    boxBArea = (boxB[2] - boxB[0]) * (boxB[3] - boxB[1])
    return interArea / float(boxAArea + boxBArea - interArea + 1e-6)

class KalmanBoxTracker:
    """
    This class represents the internal state of an object tracked as a bounding box.
    State vector: [cx, cy, s, r, vx, vy, vs] where
      - (cx, cy): center of the box
      - s: scale (area)
      - r: aspect ratio
      - (vx, vy, vs): velocities
    """
    count = 0

    def __init__(self, bbox):
        """
        Initialize with bounding box [x1, y1, x2, y2].
        """
        self.kf = KalmanFilter(dim_x=7, dim_z=4)
        
        # Define state transition matrix F and measurement matrix H
        self.kf.F = np.array([[1, 0, 0, 0, 1, 0, 0],
                              [0, 1, 0, 0, 0, 1, 0],
                              [0, 0, 1, 0, 0, 0, 1],
                              [0, 0, 0, 1, 0, 0, 0],
                              [0, 0, 0, 0, 1, 0, 0],
                              [0, 0, 0, 0, 0, 1, 0],
                              [0, 0, 0, 0, 0, 0, 1]])
        self.kf.H = np.array([[1, 0, 0, 0, 0, 0, 0],
                              [0, 1, 0, 0, 0, 0, 0],
                              [0, 0, 1, 0, 0, 0, 0],
                              [0, 0, 0, 1, 0, 0, 0]])
        
        self.kf.P[4:, 4:] *= 1000.  # high uncertainty for the unobserved velocities
        self.kf.P *= 10.

        # Initialize state: convert bbox from [x1,y1,x2,y2] to [cx, cy, s, r]
        cx, cy, s, r = self._convert_bbox_to_z(bbox)
        self.kf.x[:4] = np.array([[cx], [cy], [s], [r]])
        
        self.time_since_update = 0
        self.id = KalmanBoxTracker.count
        KalmanBoxTracker.count += 1
        self.history = []
        self.hits = 1
        self.hit_streak = 1

    def update(self, bbox):
        """
        Update state with observed bbox [x1, y1, x2, y2].
        """
        self.time_since_update = 0
        self.history = []
        self.hits += 1
        self.hit_streak += 1
        z = np.array(self._convert_bbox_to_z(bbox)).reshape((4, 1))
        self.kf.update(z)

    def predict(self):
        """
        Advance the state vector and return the predicted bounding box estimate.
        """
        self.kf.predict()
        self.time_since_update += 1
        pred_bbox = self._convert_x_to_bbox(self.kf.x)
        self.history.append(pred_bbox)
        return pred_bbox

    def get_state(self):
        """
        Return the current bounding box estimate.
        """
        return self._convert_x_to_bbox(self.kf.x)

    @staticmethod
    def _convert_bbox_to_z(bbox):
        """
        Convert [x1, y1, x2, y2] to [cx, cy, s, r]:
          - s: area
          - r: aspect ratio (width/height)
        """
        x1, y1, x2, y2 = bbox
        w = x2 - x1
        h = y2 - y1
        cx = x1 + w / 2.
        cy = y1 + h / 2.
        s = w * h
        r = w / (h + 1e-6)
        return [cx, cy, s, r]

    @staticmethod
    def _convert_x_to_bbox(x, score=None):
        """
        Convert state vector (first 4 elements) [cx, cy, s, r] to [x1, y1, x2, y2].
        Clamps s and r to avoid invalid values.
        """
        # Extract first 4 state values
        state = x.flatten()[:4]
        cx, cy, s, r = state
        
        # Ensure s and r are positive to avoid invalid sqrt operations
        s = max(s, 1e-6)
        r = max(r, 1e-6)
        
        # Compute width and height
        w = np.sqrt(s * r)
        h = np.sqrt(s / (r + 1e-6))
        
        x1 = cx - w / 2.
        y1 = cy - h / 2.
        x2 = cx + w / 2.
        y2 = cy + h / 2.
        
        if score is None:
            return np.array([x1, y1, x2, y2])
        else:
            return np.array([x1, y1, x2, y2, score])
        """
        [cx, cy, s, r] -> [x1, y1, x2, y2]
        """
        w = np.sqrt(x[2]*x[3])
        h = np.sqrt(x[2]/(x[3]+1e-6))
        x1 = x[0] - w/2.
        y1 = x[1] - h/2.
        x2 = x[0] + w/2.
        y2 = x[1] + h/2.
        if score is None:
            return np.array([x1, y1, x2, y2]).reshape((4,))
        else:
            return np.array([x1, y1, x2, y2, score]).reshape((5,))


In [74]:
class Sort:
    def __init__(self, max_age=5, min_hits=3, iou_threshold=0.3):
        """
        max_age: frames to keep track alive without updates
        min_hits: min detections before track is confirmed
        iou_threshold: IoU matching threshold
        """
        self.max_age = max_age
        self.min_hits = min_hits
        self.iou_threshold = iou_threshold
        self.trackers = []

    def update(self, dets=np.empty((0,5))):
        """
        Params:
          dets: numpy array of detections in [x1, y1, x2, y2, score]
        Returns:
          an array of the final tracked objects: [x1,y1,x2,y2,track_id]
        """
        # 1. Predict new locations for existing trackers.
        for t in range(len(self.trackers)):
            self.trackers[t].predict()

        # 2. Build cost matrix for matching
        trks = []
        for t in range(len(self.trackers)):
            pos = self.trackers[t].get_state()
            trks.append(pos)
        trks = np.array(trks)  # shape: [N, 4]

        # If no trackers or no detections, handle edge cases:
        if len(trks) == 0 or len(dets) == 0:
            matches = []
            unmatched_dets = range(len(dets))
            unmatched_trks = range(len(trks))
        else:
            iou_matrix = np.zeros((len(trks), len(dets)), dtype=np.float32)
            for t in range(len(trks)):
                for d in range(len(dets)):
                    iou_matrix[t,d] = iou(trks[t], dets[d,:4])
            
            row_ind, col_ind = linear_sum_assignment(-iou_matrix)
            matches = []
            unmatched_dets = list(range(len(dets)))
            unmatched_trks = list(range(len(trks)))
            
            for r, c in zip(row_ind, col_ind):
                if iou_matrix[r,c] < self.iou_threshold:
                    continue
                matches.append([r, c])
                unmatched_dets.remove(c)
                unmatched_trks.remove(r)

        # 3. Update matched trackers with assigned detections
        for r, c in matches:
            bbox = dets[c,:4]
            self.trackers[r].update(bbox)

        # 4. Create new trackers for unmatched detections
        for i in unmatched_dets:
            bbox = dets[i,:4]
            new_tracker = KalmanBoxTracker(bbox)
            self.trackers.append(new_tracker)

        # 5. Get rid of dead tracks
        ret = []
        for t in reversed(range(len(self.trackers))):
            trk = self.trackers[t]
            d = trk.get_state()
            # If the tracker hasn't been updated, remove if too old
            if trk.time_since_update > self.max_age:
                self.trackers.pop(t)
                continue
            # If confirmed (hits > min_hits) or recently updated, store
            if (trk.hit_streak >= self.min_hits) or (trk.time_since_update < 1):
                ret.append(np.concatenate((d, [trk.id])).reshape(1, -1))
        if len(ret) > 0:
            return np.concatenate(ret)
        return np.empty((0,5))

In [75]:
# Suppose your test sequence is "MOT17-01-FRCNN"
test_seq_path = r'mot17/MOT17/test/MOT17-01-FRCNN'  # adjust accordingly

test_dataset = MOT17DetectionDataset(seq_dir=test_seq_path, is_test_dir = True)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

# Switch model to eval mode
model.eval()

# Initialize our SORT tracker
sort_tracker = Sort(max_age=5, min_hits=2, iou_threshold=0.3)

all_results = []  # to store final bounding boxes with IDs

with torch.no_grad():
    for images, dets_tensor, frame_id in test_loader:
        # images shape: [1, 3, H, W]
        images = [img.cuda() if torch.cuda.is_available() else img for img in images]
        # 1. Inference with Faster R-CNN (optional if you want to combine both approaches)
        outputs = model(images)  # list of dict
        # The model outputs 'boxes' (N,4), 'labels', 'scores'
        pred_boxes = outputs[0]['boxes'].cpu().numpy()  # shape [N,4]
        pred_scores = outputs[0]['scores'].cpu().numpy()  # shape [N]
        # 2. Or if you want to rely on the test set's detections:
        # dets_tensor is shape [K,5] -> [x, y, w, h, conf]
        # convert [x, y, w, h] => [x1, y1, x2, y2]
        dets_np = dets_tensor[0].numpy()
        boxes_xyxy = []
        for i in range(dets_np.shape[0]):
            x1 = dets_np[i,0]
            y1 = dets_np[i,1]
            w = dets_np[i,2]
            h = dets_np[i,3]
            conf = dets_np[i,4]
            x2 = x1 + w
            y2 = y1 + h
            boxes_xyxy.append([x1, y1, x2, y2, conf])
        boxes_xyxy = np.array(boxes_xyxy)  # shape: [K,5]

        # (Pick whichever detection set you want to track:
        #   your model's outputs or the .txt detections. 
        #   For demonstration, let's track the .txt detections directly.)
        try:
            tracked_objects = sort_tracker.update(boxes_xyxy)
        except Exception as e:
            print(f'Error for boxes-  {boxes_xyxy}')
            raise e
        
        # tracked_objects is [N, 5] => [x1, y1, x2, y2, track_id]
        if tracked_objects.shape[0] > 0:
            for obj in tracked_objects:
                x1, y1, x2, y2, track_id = obj
                # Store or visualize
                all_results.append({
                    "frame_id": int(frame_id.item()),
                    "track_id": int(track_id),
                    "bbox": [float(x1), float(y1), float(x2), float(y2)]
                })

# all_results will have the final bounding boxes with consistent track IDs across frames
print("Tracking complete. Results:", len(all_results))


Tracking complete. Results: 6175


In [93]:
def compute_iou(boxA, boxB):
    """
    Computes the Intersection over Union (IoU) of two bounding boxes.
    Each box is [x1, y1, x2, y2].
    """
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])
    
    interArea = max(0, xB - xA) * max(0, yB - yA)
    areaA = (boxA[2] - boxA[0]) * (boxA[3] - boxA[1])
    areaB = (boxB[2] - boxB[0]) * (boxB[3] - boxB[1])
    unionArea = areaA + areaB - interArea
    
    return interArea / unionArea if unionArea > 0 else 0
def evaluate_detection_accuracy(model, dataloader, device, iou_threshold=0.5):
    model.eval()
    total_accuracy = 0.0
    count = 0
    with torch.no_grad():
        for batch in dataloader:
            # Unpack the batch. If three items are returned (image, target, frame_id), ignore the frame id.
            if len(batch) == 3:
                images, targets, _ = batch
            else:
                images, targets = batch

            images = [img.to(device) for img in images]
            outputs = model(images)
            
            # Loop over each image in the batch
            for output, target in zip(outputs, targets):
                pred_boxes = output['boxes'].cpu().numpy()  # shape [N, 4]

                # Convert target to bounding box format if it's a tensor.
                # Expected format in test dataset: [x, y, w, h, conf]
                if isinstance(target, torch.Tensor):
                    gt_tensor = target.cpu().numpy()  # shape [K,5]
                    gt_boxes = []
                    for row in gt_tensor:
                        x, y, w, h, _ = row  # ignore confidence
                        gt_boxes.append([x, y, x + w, y + h])
                    gt_boxes = np.array(gt_boxes)
                else:
                    # If target is already a dictionary with key 'boxes'
                    gt_boxes = target['boxes'].cpu().numpy()
                
                if len(gt_boxes) == 0:
                    continue  # skip if no ground truth boxes
                
                true_positives = 0
                # For each ground truth box, check if any predicted box has sufficient IoU
                for gt in gt_boxes:
                    for pred in pred_boxes:
                        if compute_iou(gt, pred) >= iou_threshold:
                            true_positives += 1
                            break  # found a match, move to next ground truth box
                frame_accuracy = true_positives / len(gt_boxes)
                total_accuracy += frame_accuracy
                count += 1

    return total_accuracy / count if count > 0 else 0

# Now evaluate and print the accuracy:
model.eval()
accuracy = evaluate_detection_accuracy(model, test_loader, device, iou_threshold=0.5)
print(f"Model Detection Accuracy: {accuracy*100:.2f}%")


Model Detection Accuracy: 0.00%


In [77]:
for item in all_results:
    # open the corresponding frame
    f_id = item["frame_id"]
    track_id = item["track_id"]
    (x1, y1, x2, y2) = item["bbox"]

    img_path = os.path.join(test_seq_path, 'img1', f"{f_id:06d}.jpg")
    frame_bgr = cv2.imread(img_path)

    # draw rectangle
    cv2.rectangle(frame_bgr, (int(x1), int(y1)), (int(x2), int(y2)), (0,255,0), 2)
    cv2.putText(frame_bgr, f"ID: {track_id}", (int(x1), int(y1)-5),
                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 1)
    
    # Save or show
    frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
    clear_output(wait=True)
    plt.figure(figsize=(10, 6))
    plt.imshow(frame_rgb)
    plt.axis("off")
    plt.show()

KeyboardInterrupt: 

In [None]:
import cv2
import matplotlib.pyplot as plt

# Load your image
img = cv2.imread('mot17/MOT17/train/MOT17-02-FRCNN/img1/000001.jpg')

# Example: assuming first four numbers are x_min, y_min, x_max, y_max
x_min, y_min, x_max, y_max = 1, 1, 912, 484

# Draw the bounding box (red rectangle)
cv2.rectangle(img, (x_min, y_min), (x_max, y_max), (0, 0, 255), 2)

# Optionally, add text for any additional info (e.g., class or confidence)
cv2.putText(img, "Detected", (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0,0,255), 2)

# Display the image
plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
cv2.waitKey(0)
cv2.destroyAllWindows()
