# Implement a tiny version of YOLO with DIOR dataset

## Import libraries

In [1]:
import os
import numpy as np
from tqdm.auto import tqdm
from collections import Counter
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import torch
import torch.nn as nn
from torch.utils.data import random_split
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from PIL import Image
import torch.optim as optim
from torchinfo import summary


  from .autonotebook import tqdm as notebook_tqdm


# Implement YOLO architecture

In [2]:
class SquareActivation(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, t):
        return torch.pow(t, 2)

In [3]:
class TinyissimoYOLO(nn.Module):
    def __init__(self, B=2, num_classes=1, S=4):
        super(TinyissimoYOLO, self).__init__()
        
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.layer2 = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.layer3 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,stride = 2)
        )

        self.layer4 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,stride = 2)
        )

        self.fclayers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128*5*5, 256),
            nn.ReLU(),
            nn.Linear(256, S*S*(num_classes + 3 * B)),
        )


    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.fclayers(x)
        
        return x


model = TinyissimoYOLO()

In [4]:
summary(model, input_size=(1, 3, 88, 88))

Layer (type:depth-idx)                   Output Shape              Param #
TinyissimoYOLO                           [1, 112]                  --
├─Sequential: 1-1                        [1, 16, 44, 44]           --
│    └─Conv2d: 2-1                       [1, 16, 88, 88]           448
│    └─ReLU: 2-2                         [1, 16, 88, 88]           --
│    └─MaxPool2d: 2-3                    [1, 16, 44, 44]           --
├─Sequential: 1-2                        [1, 32, 22, 22]           --
│    └─Conv2d: 2-4                       [1, 32, 44, 44]           4,640
│    └─ReLU: 2-5                         [1, 32, 44, 44]           --
│    └─MaxPool2d: 2-6                    [1, 32, 22, 22]           --
├─Sequential: 1-3                        [1, 64, 11, 11]           --
│    └─Conv2d: 2-7                       [1, 64, 22, 22]           18,496
│    └─ReLU: 2-8                         [1, 64, 22, 22]           --
│    └─MaxPool2d: 2-9                    [1, 64, 11, 11]           --
├─Seque

## Utility Function

### Intersection over Union

In [41]:
def intersection_over_union(boxes_preds, boxes_labels):
    """
    Calculates intersection over union
    
    Parameters:
        boxes_preds (tensor): Predictions of Bounding Boxes (BATCH_SIZE, 4)
        boxes_labels (tensor): Correct labels of Bounding Boxes (BATCH_SIZE, 4)    
    Returns:
        tensor: Intersection over union for all examples
    """
    # boxes_preds shape is (N, 4) where N is the number of bboxes
    #boxes_labels shape is (n, 4)
    
    box1_x1 = boxes_preds[..., 0:1] - boxes_preds[..., 2:3] / 2
    box1_y1 = boxes_preds[..., 1:2] - boxes_preds[..., 3:4] / 2
    box1_x2 = boxes_preds[..., 0:1] + boxes_preds[..., 2:3] / 2
    box1_y2 = boxes_preds[..., 1:2] + boxes_preds[..., 3:4] / 2
    box2_x1 = boxes_labels[..., 0:1] - boxes_labels[..., 2:3] / 2
    box2_y1 = boxes_labels[..., 1:2] - boxes_labels[..., 3:4] / 2
    box2_x2 = boxes_labels[..., 0:1] + boxes_labels[..., 2:3] / 2
    box2_y2 = boxes_labels[..., 1:2] + boxes_labels[..., 3:4] / 2
    
    x1 = torch.max(box1_x1, box2_x1)
    y1 = torch.max(box1_y1, box2_y1)
    x2 = torch.min(box1_x2, box2_x2)
    y2 = torch.min(box1_y2, box2_y2)
    #print(f"x1: {x1}, y1: {y1}, x2: {x2}, y2: {y2}")
    
    #.clamp(0) is for the case when they don't intersect. Since when they don't intersect, one of these will be negative so that should become 0
    intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)
    #print(f"intersection: {intersection}")

    box1_area = abs((box1_x2 - box1_x1) * (box1_y2 - box1_y1))
    box2_area = abs((box2_x2 - box2_x1) * (box2_y2 - box2_y1))
    #print(f"box1_area: {box1_area}, box2_area: {box2_area}")
    
    return intersection / (box1_area + box2_area - intersection + 1e-6)

### Non Max Suppression

**Input**: A list of Proposal boxes B, corresponding confidence scores S and overlap threshold N.

**Output**: A list of filtered proposals D.

Algorithm:

1.  Select the proposal with highest confidence score, remove it from B and add it to the final proposal list D. (Initially D is empty).
2.  Now compare this proposal with all the proposals — calculate the IOU (Intersection over Union) of this proposal with every other proposal. If the IOU is greater than the threshold N, remove that proposal from B.
3.  Again take the proposal with the highest confidence from the remaining proposals in B and remove it from B and add it to D.
4.  Once again calculate the IOU of this proposal with all the proposals in B and eliminate the boxes which have high IOU than threshold.
5.  This process is repeated until there are no more proposals left in B.


In [42]:
def non_max_suppression(bboxes, iou_threshold, threshold):
    """
    Does Non Max Suppression given bboxes
    Parameters:
        bboxes (list): list of lists containing all bboxes with each bboxes
        specified as [class_pred, prob_score, x_center, y_center, width, height]
        iou_threshold (float): threshold where predicted bboxes is correct
        threshold (float): threshold to remove predicted bboxes (independent of IoU) 
    Returns:
        list: bboxes after performing NMS given a specific IoU threshold
    """

    assert type(bboxes) == list

    bboxes = [box for box in bboxes if box[1] > threshold]
    bboxes = sorted(bboxes, key=lambda x: x[1], reverse=True)
    bboxes_after_nms = []

    while bboxes:
        chosen_box = bboxes.pop(0)

        bboxes = [
            box
            for box in bboxes
            if box[0] != chosen_box[0]
            or intersection_over_union(
                torch.tensor(chosen_box[2:]),
                torch.tensor(box[2:]),
            )
            < iou_threshold
        ]

        bboxes_after_nms.append(chosen_box)
    #print(f"bboxes_after_nms: {bboxes_after_nms}")

    return bboxes_after_nms

### Mean Average Precision

It describes a trade-off between precision and recall.

**Precision**, also referred to as the positive predictive value, describes how well a model predicts the positive class. 
$$Precision=\frac{TP}{TP+FP}$$
>   Of all bounding box **predictions**, what fraction was actually correct?

**Recall**, also called sensitivity tells you if your model made the right predictions when it should have. 
$$Recall=\frac{TP}{TP+FN}$$
>   Of all **target** bounding boxes, what fraction did we correctly detect?


In [43]:
def mean_average_precision(
    pred_boxes, true_boxes, iou_threshold=0.5, num_classes=1
):
    """
    Calculates mean average precision 
    Parameters:
        pred_boxes (list): list of lists containing all bboxes with each bboxes
        specified as [train_idx, class_prediction, prob_score, x_center, y_center, width, height]
        true_boxes (list): Similar as pred_boxes except all the correct ones 
        iou_threshold (float): threshold where predicted bboxes is correct
        num_classes (int): number of classes
    Returns:
        float: mAP value across all classes given a specific IoU threshold 
    """

    # list storing all AP for respective classes
    average_precisions = []

    # used for numerical stability later on
    epsilon = 1e-6

    for c in range(num_classes):
        detections = []
        ground_truths = []

        # Go through all predictions and targets,
        # and only add the ones that belong to the
        # current class c
        for detection in pred_boxes:
            if detection[1] == c:
                detections.append(detection)
        #print(f"{c} class has {len(detections)} detections")

        for true_box in true_boxes:
            if true_box[1] == c:
                ground_truths.append(true_box)
        #print(f"{c} class has {len(ground_truths)} ground truths")

        # find the amount of bboxes for each training example
        # Counter here finds how many ground truth bboxes we get
        # for each training example, so let's say img 0 has 3,
        # img 1 has 5 then we will obtain a dictionary with:
        # amount_bboxes = {0:3, 1:5}
        amount_bboxes = Counter([gt[0] for gt in ground_truths])
        #print(f"{c} class has {len(amount_bboxes)} amount bboxes")

        # We then go through each key, val in this dictionary
        # and convert to the following (w.r.t same example):
        # ammount_bboxes = {0:torch.tensor[0,0,0], 1:torch.tensor[0,0,0,0,0]}
        for key, val in amount_bboxes.items():
            amount_bboxes[key] = torch.zeros(val)

        # sort by box probabilities which is index 2
        detections.sort(key=lambda x: x[2], reverse=True)
        TP = torch.zeros((len(detections)))
        FP = torch.zeros((len(detections)))
        total_true_bboxes = len(ground_truths)
        #print(f"{c} class has {total_true_bboxes} total true bboxes")
        # If none exists for this class then we can safely skip
        if total_true_bboxes == 0:
            continue

        for detection_idx, detection in enumerate(detections):
            # Only take out the ground_truths that have the same
            # training idx as detection
            ground_truth_img = [
                bbox for bbox in ground_truths if bbox[0] == detection[0]
            ]

            num_gts = len(ground_truth_img)
            #print(f"{c} class has {num_gts} ground truths for detection {detection_idx}")
            best_iou = 0

            for idx, gt in enumerate(ground_truth_img):
                iou = intersection_over_union(
                    torch.tensor(detection[3:]),
                    torch.tensor(gt[3:])
                )

                if iou > best_iou:
                    best_iou = iou
                    best_gt_idx = idx

            if best_iou > iou_threshold:
                # only detect ground truth detection once
                if amount_bboxes[detection[0]][best_gt_idx] == 0:
                    # true positive and add this bounding box to seen
                    TP[detection_idx] = 1
                    amount_bboxes[detection[0]][best_gt_idx] = 1
                else:
                    #These additional detections are considered false positives because they do not correspond to a new, unique object
                    #they're essentially "over-detecting" an object that has already been correctly identified.
                    FP[detection_idx] = 1

            # if IOU is lower then the detection is a false positive
            else:
                FP[detection_idx] = 1

        #[1, 1, 0, 1, 0] -> [1, 2, 2, 3, 3]
        TP_cumsum = torch.cumsum(TP, dim=0)
        FP_cumsum = torch.cumsum(FP, dim=0)
        recalls = TP_cumsum / (total_true_bboxes + epsilon)
        precisions = torch.divide(TP_cumsum, (TP_cumsum + FP_cumsum + epsilon))
        precisions = torch.cat((torch.tensor([1]), precisions))
        recalls = torch.cat((torch.tensor([0]), recalls))
        # torch.trapz for numerical integration
        average_precisions.append(torch.trapz(precisions, recalls))

    return sum(average_precisions) / len(average_precisions)

### Plot image

In [44]:
def plot_image(image, boxes):
    """Plots predicted bounding boxes on the image"""
    im = np.array(image)
    height, width, _ = im.shape

    # Create figure and axes
    fig, ax = plt.subplots(1)
    # Display the image
    ax.imshow(im)

    # box[0] is x midpoint, box[2] is width
    # box[1] is y midpoint, box[3] is height

    # Create a Rectangle patch
    for box in boxes:
        class_label = int(box[0])
        box = box[2:]
        assert len(box) == 4, "Got more values than in x, y, w, h, in a box!"
        upper_left_x = box[0] - box[2] / 2
        upper_left_y = box[1] - box[3] / 2
        rect = patches.Rectangle(
            (upper_left_x * width, upper_left_y * height),
            box[2] * width,
            box[3] * height,
            linewidth=1,
            edgecolor="r",
            facecolor="none",
        )
        # Add the patch to the Axes
        ax.add_patch(rect)
        
        # Add class label text
        ax.text(upper_left_x * width, upper_left_y * height, str(class_label), color='r', fontsize=10, verticalalignment='bottom')

    plt.show()


### Get and convert boxes

In [45]:
def get_bboxes(
    loader,
    model,
    iou_threshold,
    threshold,
    device="cuda",
):
    all_pred_boxes = []
    all_true_boxes = []

    # make sure model is in eval before get bboxes
    model.eval()
    train_idx = 0

    for batch_idx, (x, labels) in enumerate(loader):
        x = x.to(device)
        with torch.no_grad():
            predictions = model(x)

        batch_size = x.shape[0]
        true_bboxes = cellboxes_to_boxes(labels)
        bboxes = cellboxes_to_boxes(predictions)

        for idx in range(batch_size):
            nms_boxes = non_max_suppression(
                bboxes[idx],
                iou_threshold=iou_threshold,
                threshold=threshold,
            )

            # # Activate only for test
            # if batch_idx == 0 and idx == 0:
            #    plot_image(x[idx].permute(1,2,0).to("cpu"), nms_boxes)

            for nms_box in nms_boxes:
                all_pred_boxes.append([train_idx] + nms_box)

            for box in true_bboxes[idx]:
                # many will get converted to 0 pred
                if box[1] > threshold:
                    all_true_boxes.append([train_idx] + box)

            train_idx += 1

    model.train()
    return all_pred_boxes, all_true_boxes


In [46]:
def convert_cellboxes(predictions, S=4, C=1):
    """
    Converts bounding boxes output from Yolo with
    an image split size of S into entire image ratios
    rather than relative to cell ratios.
    """
    predictions = predictions.to("cpu")
    batch_size = predictions.shape[0]
    predictions = predictions.reshape(batch_size, S, S, C + 10)
    bboxes1 = predictions[..., C + 1:C + 5]
    bboxes2 = predictions[..., C + 6:C + 10]
    
    scores = torch.cat(
        (predictions[..., C].unsqueeze(0), predictions[..., C + 5].unsqueeze(0)), dim=0
    )
    best_box = scores.argmax(0).unsqueeze(-1)
    best_boxes = bboxes1 * (1 - best_box) + best_box * bboxes2
    # This results in a tensor with shape (batch_size, 7, 7, 1) where each element represents the index of a grid cell.
    cell_indices = torch.arange(S).repeat(batch_size, S, 1).unsqueeze(-1)

    x = 1 / S * (best_boxes[..., :1] + cell_indices)
    # Permute because is used here to swap these indices to match the (x, y) convention used in the best_boxes tensor.
    # [0,1,2]->[0,0,0]
    # [0,1,2]->[1,1,1]
    # [0,1,2]->[2,2,2]
    y = 1 / S * (best_boxes[..., 1:2] + cell_indices.permute(0, 2, 1, 3))
    w_y = 1 / S * best_boxes[..., 2:4]

    converted_bboxes = torch.cat((x, y, w_y), dim=-1)
    predicted_class = predictions[..., :C].argmax(-1).unsqueeze(-1)
    best_confidence = torch.max(predictions[..., C], predictions[..., C + 5]).unsqueeze(
        -1
    )
    converted_preds = torch.cat(
        (predicted_class, best_confidence, converted_bboxes), dim=-1
    )
    #print(f"converted_preds: {converted_preds}")

    return converted_preds

In [47]:
def cellboxes_to_boxes(out, S=4):
    converted_pred = convert_cellboxes(out).reshape(out.shape[0], S * S, -1)
    converted_pred[..., 0] = converted_pred[..., 0].long()
    all_bboxes = []

    #iterate over each batch sample
    for ex_idx in range(out.shape[0]):
        bboxes = []
        #iterate over each grid in the grid cell
        for bbox_idx in range(S * S):
            bboxes.append([x.item() for x in converted_pred[ex_idx, bbox_idx, :]])
        all_bboxes.append(bboxes)
    #print(f"all_bboxes: {all_bboxes}")
    return all_bboxes

>   It returns a list containing bounding boxes for each example in the batch. Each bounding box is represented as a list of values '[x, y, width, height, confidence, class_probabilities]'

## Create Loader of Dataset

In [48]:


class DiorDataset(torch.utils.data.Dataset):
    def __init__(self, root_dir, S=4, B=2, C=1, transform=None, train=True):
        self.root_dir = root_dir
        self.transform = transform
        self.S = S
        self.B = B
        self.C = C
        self.train = train

        # Determine the directory of the images and labels
        self.img_dir = os.path.join(self.root_dir, 'images')
        self.label_dir = os.path.join(self.root_dir, 'label')
        
        # if self.train:
        #     self.img_dir = os.path.join(self.img_dir, 'train')
        #     self.label_dir = os.path.join(self.label_dir, 'train')
        # else:
        #     self.img_dir = os.path.join(self.img_dir, 'test')
        #     self.label_dir = os.path.join(self.label_dir, 'test')
        
        

        self.img_ids = os.listdir(self.img_dir)

    def __len__(self):
        return len(self.img_ids)

    def __getitem__(self, index):
        img_id = self.img_ids[index].split('.')[0]
        boxes = []

        # Load image
        img_path = os.path.join(self.img_dir, img_id + '.jpg')
        image = Image.open(img_path)
        image = image.convert("RGB")

        # Load labels
        label_path = os.path.join(self.label_dir, img_id + '.txt')
        with open(label_path, 'r') as f:
            for line in f.readlines():
                class_label, x, y, width, height = map(float, line.strip().split())
                boxes.append([class_label, x, y, width, height])
        
        if len(boxes) > 3:
            boxes = boxes[:3]
        boxes = torch.tensor(boxes)
        #print(f"boxes: {boxes}")
        if self.transform:
            image, boxes = self.transform(image, boxes)

        # Convert To Cells
        label_matrix = torch.zeros((self.S, self.S, self.C + 5 * self.B))
        for box in boxes:
            class_label, x, y, width, height = box.tolist()
            class_label = int(class_label)

            i, j = int(self.S * y), int(self.S * x)
            x_cell, y_cell = self.S * x - j, self.S * y - i

            width_cell, height_cell = (
                width * self.S,
                height * self.S,
            )

            if label_matrix[i, j, self.C] == 0:
                label_matrix[i, j, self.C] = 1

                box_coordinates = torch.tensor(
                    [x_cell, y_cell, width_cell, height_cell]
                )

                label_matrix[i, j, self.C+1:self.C+5] = box_coordinates
                label_matrix[i, j, class_label] = 1
    
        #print(f"label_matrix shape: {label_matrix.shape}")

        return image, label_matrix

## YOLO Loss

From original paper: 
>   YOLO predicts multiple bounding boxes per grid cell. At training time we only want one bounding box predictor to be responsible for each object. We assign one predictor to be “responsible” for predicting an object based on which prediction has the highest current IOU with the ground truth. This leads to specialization between the bounding box predictors.
Each predictor gets better at predicting certain sizes, aspect ratios, or classes of object, improving overall recall. 

$$
\begin{gathered}
\lambda_{\text {coord }} \sum_{i=0}^{S^2} \sum_{j=0}^B \mathbb{1}_{i j}^{\text {obj }}\left[\left(x_i-\hat{x}_i\right)^2+\left(y_i-\hat{y}_i\right)^2\right] \\
+\lambda_{\text {coord }} \sum_{i=0}^{S^2} \sum_{j=0}^B \mathbb{1}_{i j}^{\text {obj }}\left[\left(\sqrt{w_i}-\sqrt{\hat{w}_i}\right)^2+\left(\sqrt{h_i}-\sqrt{\hat{h}_i}\right)^2\right] \\
+\sum_{i=0}^{S^2} \sum_{j=0}^B \mathbb{1}_{i j}^{\text {obj }}\left(C_i-\hat{C}_i\right)^2 \\
+\lambda_{\text {noobj }} \sum_{i=0}^{S^2} \sum_{j=0}^B \mathbb{1}_{i j}^{\text {noobj }}\left(C_i-\hat{C}_i\right)^2 \\
+\sum_{i=0}^{S^2} \mathbb{1}_i^{\text {obj }} \sum_{c \in \text { classes }}\left(p_i(c)-\hat{p}_i(c)\right)^2
\end{gathered}
$$

During training we optimize the following, multi-part where $ 1_{obj}^i $ denotes if object appears in cell **i** and $1_{obj}^{ij}$ denotes that the **j**  bounding box predictor in cell i is “responsible” for that prediction.

In every image many grid cells do not contain any object. This pushes the “confidence” scores of those cells towards zero, often overpowering the gradient from cells that do contain objects. This can lead to model instability, as the model may prioritize learning to predict empty cells rather than focusing on correctly detecting objects in cells containing them, causing training to diverge early on. To remedy this, we increase the loss from bounding box coordinate predictions and decrease the loss from confidence predictions for boxes that don’t contain objects. We use two parameters, $\lambda_{coord}$ and $\lambda_{noobj}$  to accomplish this.

Note that the loss function only penalizes classification error if an object is present in that grid cell (hence the conditional class probability discussed earlier). It also only penalizes bounding box coordinate error if that predictor is “responsible” for the ground truth box (i.e. has the highest
IOU of any predictor in that grid cell).

In [49]:
class YoloLoss(nn.Module):
    """
    Calculate the loss for yolo (v1) model
    """

    def __init__(self, S=4, B=2, C=1):
        super(YoloLoss, self).__init__()
        self.mse = nn.MSELoss(reduction="sum")

        """
        S is split size of image (in paper 7),
        B is number of boxes (in paper 2),
        C is number of classes (in paper 20, in dataset 3),
        """
        self.S = S
        self.B = B
        self.C = C

        # These are from Yolo paper, signifying how much we should
        # pay loss for no object (noobj) and the box coordinates (coord)
        self.lambda_noobj = 0.5
        self.lambda_coord = 5

    def forward(self, predictions, target):
        # predictions are shaped (BATCH_SIZE, S*S(C+B*5) when inputted
        predictions = predictions.reshape(-1, self.S, self.S, self.C + self.B * 5)

        # Calculate IoU for the two predicted bounding boxes with target bbox
        iou_b1 = intersection_over_union(predictions[..., self.C + 1:self.C + 5], target[..., self.C + 1:self.C + 5])
        iou_b2 = intersection_over_union(predictions[..., self.C + 6:self.C + 10], target[..., self.C + 1:self.C + 5])
        ious = torch.cat([iou_b1.unsqueeze(0), iou_b2.unsqueeze(0)], dim=0)

        # Take the box with highest IoU out of the two prediction
        # Note that bestbox will be indices of 0, 1 for which bbox was best
        iou_maxes, bestbox = torch.max(ious, dim=0)
        exists_box = target[..., self.C].unsqueeze(3)  # in paper this is Iobj_i

        # ======================== #
        #   FOR BOX COORDINATES    #
        # ======================== #

        # Set boxes with no object in them to 0. We only take out one of the two 
        # predictions, which is the one with highest Iou calculated previously.
        box_predictions = exists_box * (
            (
                bestbox * predictions[..., self.C + 6:self.C + 10]
                + (1 - bestbox) * predictions[..., self.C + 1:self.C + 5]
            )
        )
        #print(f"box_predictions: {box_predictions.shape}")
        box_targets = exists_box * target[..., self.C + 1:self.C + 5]

        # Take sqrt of width, height of boxes
        box_predictions[..., 2:4] = torch.sign(box_predictions[..., 2:4]) * torch.sqrt(
            torch.abs(box_predictions[..., 2:4] + 1e-6)
        )
        box_targets[..., 2:4] = torch.sqrt(box_targets[..., 2:4])

        box_loss = self.mse(
            torch.flatten(box_predictions, end_dim=-2),
            torch.flatten(box_targets, end_dim=-2),
        )

        # ==================== #
        #   FOR OBJECT LOSS    #
        # ==================== #

        # pred_box is the confidence score for the bbox with highest IoU
        pred_box = (
            bestbox * predictions[..., self.C + 5:self.C + 6] + (1 - bestbox) * predictions[..., self.C:self.C + 1]
        )

        object_loss = self.mse(
            torch.flatten(exists_box * pred_box),
            torch.flatten(exists_box * target[..., self.C:self.C + 1]),
        )

        # ======================= #
        #   FOR NO OBJECT LOSS    #
        # ======================= #

        #max_no_obj = torch.max(predictions[..., 20:21], predictions[..., 25:26])
        #no_object_loss = self.mse(
        #    torch.flatten((1 - exists_box) * max_no_obj, start_dim=1),
        #    torch.flatten((1 - exists_box) * target[..., 20:21], start_dim=1),
        #)

        no_object_loss = self.mse(
            torch.flatten((1 - exists_box) * predictions[..., self.C:self.C + 1], start_dim=1),
            torch.flatten((1 - exists_box) * target[..., self.C:self.C + 1], start_dim=1),
        )

        no_object_loss += self.mse(
            torch.flatten((1 - exists_box) * predictions[..., self.C + 5:self.C + 6], start_dim=1),
            torch.flatten((1 - exists_box) * target[..., self.C:self.C + 1], start_dim=1)
        )

        # ================== #
        #   FOR CLASS LOSS   #
        # ================== #

        class_loss = self.mse(
            torch.flatten(exists_box * predictions[..., :self.C], end_dim=-2,),
            torch.flatten(exists_box * target[..., :self.C], end_dim=-2,),
        )

        loss = (
            self.lambda_coord * box_loss  # first two rows in paper
            + object_loss  # third row in paper
            + self.lambda_noobj * no_object_loss  # forth row
            + class_loss  # fifth row
        )

        return loss

## Training

In [50]:
LEARNING_RATE = 2e-5
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
BATCH_SIZE = 32 # 64 in original paper but resource exhausted error otherwise.
EPOCHS = 100
WEIGHT_DECAY = 0
LOAD_MODEL = False
LOAD_MODEL_FILE = "tinyissimoyolo.pth"

In [51]:
def train_fn(train_loader, model, optimizer, loss_fn):
    loop = tqdm(train_loader, leave=True)
    mean_loss = []
    
    for batch_idx, (x, y) in enumerate(loop):
        x, y = x.to(DEVICE), y.to(DEVICE)
        out = model(x)
        loss = loss_fn(out, y)
        mean_loss.append(loss.item())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        loop.set_postfix(loss = loss.item())
        
    print(f"Mean loss was {sum(mean_loss) / len(mean_loss)}")

In [52]:
class Compose(object):
    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, img, bboxes):
        for t in self.transforms:
            img, bboxes = t(img), bboxes

        return img, bboxes


transform = Compose([transforms.Resize((88, 88)), transforms.ToTensor()])

### Combine all

In [53]:
files_dir = 'one_class_data'
model = TinyissimoYOLO().to(DEVICE)
optimizer = optim.Adam(
    model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY
)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer, factor=0.1, patience=3, mode='max', verbose=True)
loss_fn = YoloLoss()


train_dataset = DiorDataset(
    root_dir=files_dir,
    transform=transform,
    train=True
)


# Define the length of the training set
train_len = int(0.8 * len(train_dataset))

# Define the length of the test set
test_len = len(train_dataset) - train_len

# Split the dataset
train_dataset, test_dataset = random_split(train_dataset, [train_len, test_len])

# Now you can create your DataLoaders
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, drop_last=False)



In [54]:
for epoch in range(EPOCHS):
    print(f"Epoch {epoch + 1}/{EPOCHS}")
    train_fn(train_loader, model, optimizer, loss_fn)
    
    pred_boxes, target_boxes = get_bboxes(
        train_loader, model, iou_threshold=0.5, threshold=0.4
    )
    #print(f"pred_boxes: {len(pred_boxes)}, target_boxes: {len(target_boxes)}")
    mean_avg_prec = mean_average_precision(
        pred_boxes, target_boxes, iou_threshold=0.5
    )
    print(f"Train mAP: {mean_avg_prec}")
    
    
    scheduler.step(mean_avg_prec)


Epoch 1/100


100%|██████████| 28/28 [00:03<00:00,  9.27it/s, loss=797]    


Mean loss was 932.8095158168247
Train mAP: 0.0
Epoch 2/100


100%|██████████| 28/28 [00:02<00:00,  9.53it/s, loss=770]    


Mean loss was 929.8528725760324
Train mAP: 0.0
Epoch 3/100


100%|██████████| 28/28 [00:02<00:00,  9.63it/s, loss=629]    


Mean loss was 923.1856711251395
Train mAP: 0.0
Epoch 4/100


100%|██████████| 28/28 [00:02<00:00,  9.44it/s, loss=758]    


Mean loss was 899.2420458112445
Train mAP: 9.708925972518045e-06
Epoch 5/100


100%|██████████| 28/28 [00:02<00:00,  9.53it/s, loss=737] 


Mean loss was 871.1229771205357
Train mAP: 0.0
Epoch 6/100


100%|██████████| 28/28 [00:02<00:00,  9.53it/s, loss=605]


Mean loss was 860.7674386160714
Train mAP: 0.0
Epoch 7/100


100%|██████████| 28/28 [00:02<00:00,  9.51it/s, loss=631]    


Mean loss was 851.9066750662668
Train mAP: 0.0
Epoch 8/100


100%|██████████| 28/28 [00:02<00:00,  9.40it/s, loss=751]


Mean loss was 831.8893367222378
Train mAP: 0.00010433792340336367
Epoch 9/100


100%|██████████| 28/28 [00:02<00:00,  9.53it/s, loss=599]


Mean loss was 819.7517329624721
Train mAP: 9.571461123414338e-05
Epoch 10/100


100%|██████████| 28/28 [00:02<00:00,  9.56it/s, loss=667]    


Mean loss was 813.0940878731864
Train mAP: 8.809951395960525e-06
Epoch 11/100


100%|██████████| 28/28 [00:02<00:00,  9.45it/s, loss=596]


Mean loss was 820.5854928152902
Train mAP: 0.0
Epoch 12/100


100%|██████████| 28/28 [00:02<00:00,  9.41it/s, loss=643]    


Mean loss was 805.7974722726004
Train mAP: 9.907360072247684e-05
Epoch 13/100


100%|██████████| 28/28 [00:02<00:00,  9.46it/s, loss=605]   


Mean loss was 799.2983856201172
Train mAP: 5.681711991201155e-05
Epoch 14/100


100%|██████████| 28/28 [00:02<00:00,  9.34it/s, loss=587]


Mean loss was 796.3561096191406
Train mAP: 6.042631866876036e-05
Epoch 15/100


100%|██████████| 28/28 [00:02<00:00,  9.39it/s, loss=653]


Mean loss was 794.486554827009
Train mAP: 6.042631866876036e-05
Epoch 16/100


100%|██████████| 28/28 [00:02<00:00,  9.60it/s, loss=772]


Mean loss was 793.1598379952567
Train mAP: 4.8673799028620124e-05
Epoch 17/100


100%|██████████| 28/28 [00:02<00:00,  9.53it/s, loss=768]


Mean loss was 792.2129930768695
Train mAP: 4.9124308134196326e-05
Epoch 18/100


100%|██████████| 28/28 [00:02<00:00,  9.53it/s, loss=619]


Mean loss was 792.1068747384207
Train mAP: 4.9124308134196326e-05
Epoch 19/100


100%|██████████| 28/28 [00:02<00:00,  9.60it/s, loss=645]   


Mean loss was 792.0411137172154
Train mAP: 4.9124308134196326e-05
Epoch 20/100


100%|██████████| 28/28 [00:02<00:00,  9.48it/s, loss=608]


Mean loss was 791.9239436558315
Train mAP: 4.9124308134196326e-05
Epoch 21/100


100%|██████████| 28/28 [00:02<00:00,  9.57it/s, loss=688]


Mean loss was 791.8348018101284
Train mAP: 4.9124308134196326e-05
Epoch 22/100


100%|██████████| 28/28 [00:02<00:00,  9.56it/s, loss=677]    


Mean loss was 791.8240291050503
Train mAP: 4.9124308134196326e-05
Epoch 23/100


100%|██████████| 28/28 [00:02<00:00,  9.52it/s, loss=630]


Mean loss was 791.8154623849051
Train mAP: 4.9124308134196326e-05
Epoch 24/100


100%|██████████| 28/28 [00:02<00:00,  9.50it/s, loss=596]    


Mean loss was 791.8084215436663
Train mAP: 4.9124308134196326e-05
Epoch 25/100


100%|██████████| 28/28 [00:02<00:00,  9.57it/s, loss=587]


Mean loss was 791.7969142368862
Train mAP: 4.9124308134196326e-05
Epoch 26/100


100%|██████████| 28/28 [00:02<00:00,  9.54it/s, loss=691]


Mean loss was 791.7960510253906
Train mAP: 4.9124308134196326e-05
Epoch 27/100


100%|██████████| 28/28 [00:02<00:00,  9.45it/s, loss=523]


Mean loss was 791.795414515904
Train mAP: 4.9124308134196326e-05
Epoch 28/100


100%|██████████| 28/28 [00:02<00:00,  9.54it/s, loss=651]


Mean loss was 791.7945796421596
Train mAP: 4.9124308134196326e-05
Epoch 29/100


100%|██████████| 28/28 [00:02<00:00,  9.50it/s, loss=582]


Mean loss was 791.793927873884
Train mAP: 4.9124308134196326e-05
Epoch 30/100


100%|██████████| 28/28 [00:02<00:00,  9.51it/s, loss=546]   


Mean loss was 791.793212890625
Train mAP: 4.9124308134196326e-05
Epoch 31/100


100%|██████████| 28/28 [00:02<00:00,  9.48it/s, loss=681]


Mean loss was 791.7925066266741
Train mAP: 4.9124308134196326e-05
Epoch 32/100


100%|██████████| 28/28 [00:02<00:00,  9.57it/s, loss=602]   


Mean loss was 791.7918156215122
Train mAP: 4.9124308134196326e-05
Epoch 33/100


100%|██████████| 28/28 [00:02<00:00,  9.54it/s, loss=726]


Mean loss was 791.7910831996372
Train mAP: 4.9124308134196326e-05
Epoch 34/100


100%|██████████| 28/28 [00:02<00:00,  9.48it/s, loss=657]


Mean loss was 791.7903137207031
Train mAP: 4.9124308134196326e-05
Epoch 35/100


100%|██████████| 28/28 [00:02<00:00,  9.48it/s, loss=652]


Mean loss was 791.7895791190011
Train mAP: 4.9124308134196326e-05
Epoch 36/100


100%|██████████| 28/28 [00:02<00:00,  9.59it/s, loss=491]


Mean loss was 791.7888172694614
Train mAP: 4.9124308134196326e-05
Epoch 37/100


100%|██████████| 28/28 [00:02<00:00,  9.52it/s, loss=598]


Mean loss was 791.7881578717913
Train mAP: 4.9124308134196326e-05
Epoch 38/100


100%|██████████| 28/28 [00:02<00:00,  9.45it/s, loss=661]


Mean loss was 791.7873949323382
Train mAP: 4.9124308134196326e-05
Epoch 39/100


100%|██████████| 28/28 [00:02<00:00,  9.53it/s, loss=770]


Mean loss was 791.78662109375
Train mAP: 4.9124308134196326e-05
Epoch 40/100


100%|██████████| 28/28 [00:02<00:00,  9.53it/s, loss=634]    


Mean loss was 791.7858123779297
Train mAP: 4.9124308134196326e-05
Epoch 41/100


100%|██████████| 28/28 [00:02<00:00,  9.56it/s, loss=619]


Mean loss was 791.7851235525949
Train mAP: 4.9124308134196326e-05
Epoch 42/100


100%|██████████| 28/28 [00:02<00:00,  9.58it/s, loss=623]


Mean loss was 791.7843497140067
Train mAP: 4.9124308134196326e-05
Epoch 43/100


100%|██████████| 28/28 [00:02<00:00,  9.54it/s, loss=541]


Mean loss was 791.7836608886719
Train mAP: 4.9124308134196326e-05
Epoch 44/100


100%|██████████| 28/28 [00:02<00:00,  9.57it/s, loss=526]


Mean loss was 791.7827933175223
Train mAP: 4.9124308134196326e-05
Epoch 45/100


100%|██████████| 28/28 [00:02<00:00,  9.55it/s, loss=501]


Mean loss was 791.7820761544364
Train mAP: 4.9124308134196326e-05
Epoch 46/100


100%|██████████| 28/28 [00:02<00:00,  9.46it/s, loss=681]   


Mean loss was 791.7813088553293
Train mAP: 4.9124308134196326e-05
Epoch 47/100


100%|██████████| 28/28 [00:02<00:00,  9.56it/s, loss=612]    


Mean loss was 791.7806745256696
Train mAP: 4.9124308134196326e-05
Epoch 48/100


100%|██████████| 28/28 [00:02<00:00,  9.48it/s, loss=580]    


Mean loss was 791.7798483712332
Train mAP: 4.9124308134196326e-05
Epoch 49/100


100%|██████████| 28/28 [00:02<00:00,  9.54it/s, loss=702]


Mean loss was 791.7789328438895
Train mAP: 4.9124308134196326e-05
Epoch 50/100


100%|██████████| 28/28 [00:02<00:00,  9.53it/s, loss=591]


Mean loss was 791.7782483782087
Train mAP: 4.9124308134196326e-05
Epoch 51/100


100%|██████████| 28/28 [00:02<00:00,  9.55it/s, loss=674]   


Mean loss was 791.7773917061942
Train mAP: 4.9124308134196326e-05
Epoch 52/100


100%|██████████| 28/28 [00:02<00:00,  9.42it/s, loss=690]


Mean loss was 791.776633126395
Train mAP: 4.9124308134196326e-05
Epoch 53/100


100%|██████████| 28/28 [00:02<00:00,  9.51it/s, loss=680]


Mean loss was 791.7758135114398
Train mAP: 4.9124308134196326e-05
Epoch 54/100


100%|██████████| 28/28 [00:02<00:00,  9.51it/s, loss=737]


Mean loss was 791.7750418526786
Train mAP: 4.9124308134196326e-05
Epoch 55/100


100%|██████████| 28/28 [00:03<00:00,  9.31it/s, loss=768]


Mean loss was 791.7743835449219
Train mAP: 4.9124308134196326e-05
Epoch 56/100


100%|██████████| 28/28 [00:02<00:00,  9.46it/s, loss=532]


Mean loss was 791.7735225132534
Train mAP: 4.9124308134196326e-05
Epoch 57/100


100%|██████████| 28/28 [00:02<00:00,  9.49it/s, loss=668]


Mean loss was 791.772722516741
Train mAP: 4.9124308134196326e-05
Epoch 58/100


100%|██████████| 28/28 [00:02<00:00,  9.63it/s, loss=569]


Mean loss was 791.7719029017857
Train mAP: 4.9124308134196326e-05
Epoch 59/100


100%|██████████| 28/28 [00:02<00:00,  9.55it/s, loss=648]    


Mean loss was 791.7711900983538
Train mAP: 4.9124308134196326e-05
Epoch 60/100


100%|██████████| 28/28 [00:02<00:00,  9.56it/s, loss=707]    


Mean loss was 791.7703116280692
Train mAP: 4.9124308134196326e-05
Epoch 61/100


100%|██████████| 28/28 [00:02<00:00,  9.49it/s, loss=597]


Mean loss was 791.7696446010044
Train mAP: 4.9124308134196326e-05
Epoch 62/100


100%|██████████| 28/28 [00:02<00:00,  9.47it/s, loss=636]    


Mean loss was 791.7687683105469
Train mAP: 4.9124308134196326e-05
Epoch 63/100


100%|██████████| 28/28 [00:02<00:00,  9.50it/s, loss=578]


Mean loss was 791.7679639543805
Train mAP: 4.9124308134196326e-05
Epoch 64/100


100%|██████████| 28/28 [00:02<00:00,  9.52it/s, loss=634]    


Mean loss was 791.7671857561384
Train mAP: 4.9124308134196326e-05
Epoch 65/100


100%|██████████| 28/28 [00:02<00:00,  9.53it/s, loss=683]    


Mean loss was 791.7663334437779
Train mAP: 4.9124308134196326e-05
Epoch 66/100


100%|██████████| 28/28 [00:02<00:00,  9.52it/s, loss=451]


Mean loss was 791.7656108311245
Train mAP: 4.9124308134196326e-05
Epoch 67/100


100%|██████████| 28/28 [00:02<00:00,  9.54it/s, loss=671]


Mean loss was 791.7647487095425
Train mAP: 4.9124308134196326e-05
Epoch 68/100


100%|██████████| 28/28 [00:02<00:00,  9.48it/s, loss=608]


Mean loss was 791.7640359061105
Train mAP: 4.9124308134196326e-05
Epoch 69/100


100%|██████████| 28/28 [00:02<00:00,  9.53it/s, loss=625]


Mean loss was 791.7632489885602
Train mAP: 4.9124308134196326e-05
Epoch 70/100


100%|██████████| 28/28 [00:02<00:00,  9.54it/s, loss=612]


Mean loss was 791.7623835972378
Train mAP: 4.9124308134196326e-05
Epoch 71/100


100%|██████████| 28/28 [00:02<00:00,  9.50it/s, loss=743]


Mean loss was 791.7616446358817
Train mAP: 4.9124308134196326e-05
Epoch 72/100


100%|██████████| 28/28 [00:02<00:00,  9.51it/s, loss=782]


Mean loss was 791.7607792445591
Train mAP: 4.9124308134196326e-05
Epoch 73/100


100%|██████████| 28/28 [00:02<00:00,  9.50it/s, loss=621]


Mean loss was 791.7599966866629
Train mAP: 4.9124308134196326e-05
Epoch 74/100


100%|██████████| 28/28 [00:02<00:00,  9.53it/s, loss=531]    


Mean loss was 791.7591705322266
Train mAP: 4.9124308134196326e-05
Epoch 75/100


100%|██████████| 28/28 [00:02<00:00,  9.51it/s, loss=682]


Mean loss was 791.7587323869977
Train mAP: 4.9124308134196326e-05
Epoch 76/100


100%|██████████| 28/28 [00:03<00:00,  9.32it/s, loss=700]


Mean loss was 791.7574179513114
Train mAP: 4.9124308134196326e-05
Epoch 77/100


100%|██████████| 28/28 [00:02<00:00,  9.51it/s, loss=610]


Mean loss was 791.7567487444196
Train mAP: 4.9124308134196326e-05
Epoch 78/100


100%|██████████| 28/28 [00:02<00:00,  9.53it/s, loss=590]


Mean loss was 791.7558986118862
Train mAP: 4.9124308134196326e-05
Epoch 79/100


100%|██████████| 28/28 [00:02<00:00,  9.57it/s, loss=644]


Mean loss was 791.7550048828125
Train mAP: 4.9124308134196326e-05
Epoch 80/100


100%|██████████| 28/28 [00:02<00:00,  9.58it/s, loss=644] 


Mean loss was 791.7542354038784
Train mAP: 4.9124308134196326e-05
Epoch 81/100


100%|██████████| 28/28 [00:02<00:00,  9.47it/s, loss=736]


Mean loss was 791.753411429269
Train mAP: 4.9124308134196326e-05
Epoch 82/100


100%|██████████| 28/28 [00:02<00:00,  9.51it/s, loss=537]


Mean loss was 791.7526266915457
Train mAP: 4.9124308134196326e-05
Epoch 83/100


100%|██████████| 28/28 [00:02<00:00,  9.48it/s, loss=601]


Mean loss was 791.7517918178013
Train mAP: 4.9124308134196326e-05
Epoch 84/100


100%|██████████| 28/28 [00:02<00:00,  9.51it/s, loss=515]


Mean loss was 791.7510419573102
Train mAP: 4.9124308134196326e-05
Epoch 85/100


100%|██████████| 28/28 [00:02<00:00,  9.52it/s, loss=608]


Mean loss was 791.7505449567523
Train mAP: 4.9124308134196326e-05
Epoch 86/100


100%|██████████| 28/28 [00:02<00:00,  9.49it/s, loss=584]


Mean loss was 791.7492980957031
Train mAP: 4.9124308134196326e-05
Epoch 87/100


100%|██████████| 28/28 [00:02<00:00,  9.63it/s, loss=673]


Mean loss was 791.7485220772879
Train mAP: 4.9124308134196326e-05
Epoch 88/100


100%|██████████| 28/28 [00:02<00:00,  9.58it/s, loss=632]


Mean loss was 791.7478550502232
Train mAP: 4.9124308134196326e-05
Epoch 89/100


100%|██████████| 28/28 [00:02<00:00,  9.54it/s, loss=633]


Mean loss was 791.7468588692801
Train mAP: 4.9124308134196326e-05
Epoch 90/100


100%|██████████| 28/28 [00:02<00:00,  9.54it/s, loss=615]


Mean loss was 791.7460828508649
Train mAP: 4.9124308134196326e-05
Epoch 91/100


100%|██████████| 28/28 [00:02<00:00,  9.58it/s, loss=563]


Mean loss was 791.7452043805804
Train mAP: 4.9124308134196326e-05
Epoch 92/100


100%|██████████| 28/28 [00:02<00:00,  9.58it/s, loss=575]


Mean loss was 791.7445395333426
Train mAP: 4.9124308134196326e-05
Epoch 93/100


100%|██████████| 28/28 [00:02<00:00,  9.49it/s, loss=692]


Mean loss was 791.7435302734375
Train mAP: 4.9124308134196326e-05
Epoch 94/100


100%|██████████| 28/28 [00:02<00:00,  9.56it/s, loss=564]


Mean loss was 791.7426910400391
Train mAP: 4.9124308134196326e-05
Epoch 95/100


100%|██████████| 28/28 [00:02<00:00,  9.52it/s, loss=518]


Mean loss was 791.7418409075055
Train mAP: 4.9124308134196326e-05
Epoch 96/100


100%|██████████| 28/28 [00:02<00:00,  9.53it/s, loss=600]    


Mean loss was 791.7410147530692
Train mAP: 4.9124308134196326e-05
Epoch 97/100


100%|██████████| 28/28 [00:02<00:00,  9.54it/s, loss=583]


Mean loss was 791.7401907784598
Train mAP: 4.9124308134196326e-05
Epoch 98/100


100%|██████████| 28/28 [00:02<00:00,  9.55it/s, loss=803]


Mean loss was 791.7394038609096
Train mAP: 4.9124308134196326e-05
Epoch 99/100


100%|██████████| 28/28 [00:02<00:00,  9.56it/s, loss=592]


Mean loss was 791.7385668073382
Train mAP: 4.9124308134196326e-05
Epoch 100/100


100%|██████████| 28/28 [00:02<00:00,  9.57it/s, loss=516]


Mean loss was 791.7378387451172
Train mAP: 4.9124308134196326e-05


In [55]:
torch.save(model.state_dict(), "tinyissimoyolo.pth")

## Testing

### Load model and make inference

In [56]:

checkpoint = torch.load("tinyissimoyolo.pth")
# Load the state dictionary from the .pth file

# Load the state dictionary into the model
model.load_state_dict(checkpoint)

# Ensure the model is in evaluation mode
model.eval()


TinyissimoYOLO(
  (layer1): Sequential(
    (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): SquareActivation()
    (2): AvgPool2d(kernel_size=2, stride=2, padding=0)
  )
  (layer2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): SquareActivation()
    (2): AvgPool2d(kernel_size=2, stride=2, padding=0)
  )
  (layer3): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): SquareActivation()
    (2): AvgPool2d(kernel_size=2, stride=2, padding=0)
  )
  (layer4): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): SquareActivation()
    (2): AvgPool2d(kernel_size=2, stride=2, padding=0)
  )
  (fclayers): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=3200, out_features=256, bias=True)
    (2): SquareActivation()
    (3): Linear(in_features=256, out_features=176, bias=True)
  )
)

In [57]:
model.eval()

with torch.no_grad():
    pred_boxes, target_boxes = get_bboxes(
        test_loader, model, iou_threshold=0.5, threshold=0.4
    )

    mean_avg_prec = mean_average_precision(
        pred_boxes, target_boxes, iou_threshold=0.5
    )
    print(f"Test mAP: {mean_avg_prec}")

Test mAP: 0.0
