# YOLO Inference On Taco Images

- TACO: (Trash Annotations in Context)

* A model must be trained.
    - Be located in the ./checkpoints dir.
    - And its filename must be in config.yaml, ex: "yolo_v1_taco_D_2025-07-08_EPOCH_50_LOSS_2.1525_S_448.pt".
* Run Yolo model on custom images.


In [None]:
import torch
import torchvision.transforms as T
from argparse import Namespace
import torch.optim as optim
from yolov1 import Train
from utils.checkpoints import load_checkpoint
from yolov1 import YOLOv1
torch.set_printoptions(threshold=torch.inf) # When printing tensors, should all values, only use in Jupyter Notebook.
torch.manual_seed(1)

In [None]:
# For Jupyter Notebook: reloads external functions when its code changes.
%load_ext autoreload
%autoreload 1 
# 0: Disables automatic reloading (default setting). 1: Reloads only modules imported using the %aimport magic command. 2: Reloads all modules (except those explicitly excluded by %aimport)

# %aimport module name will only reload those files

### Add Configurations

In [None]:
%aimport utils.load_config
from utils.load_config import load_config

config = load_config()
config

### Add Transforms

In [None]:
# <------------- Transforms ------------->
class Compose(object):
    """Apply a sequence of transforms safely on (image, bboxes)."""

    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, img, bboxes):
        for t in self.transforms:
            img, bboxes = t(img, bboxes)
        return img, bboxes


class Resize(object):
    """Resize the image. No change needed for bboxes since they are normalized (0-1)."""

    def __init__(self, size):
        self.size = size  # (width, height) like (448,448)

    def __call__(self, img, bboxes):
        img = T.Resize(self.size)(img)
        return img, bboxes  # bboxes stay the same


class ToTensor(object):
    """Convert image to Tensor. Leave bboxes as they are."""

    def __call__(self, img, bboxes):
        img = T.ToTensor()(img)  # Automatically normalize image between 0-1
        return img, bboxes


transforms = Compose(
    # transform object to resize the bboxes and images.  Normalize image tensors
    [
        Resize((448, 448)),  # Resize image to 448x448
        ToTensor(),  # Convert image to tensor
    ]
)

### Load Model

In [None]:
yolo = YOLOv1(in_channels=3, S=config.S, B=config.B, C=config.C).to(config.DEVICE)

optimizer = optim.Adam(
    yolo.parameters(), lr=config.LEARNING_RATE, weight_decay=config.WEIGHT_DECAY
)
if config.CON_TRAINING:
    load_checkpoint(file_name=config.LOAD_MODEL_FILE, yolo=yolo, optimizer=optimizer)


### Load Dataset

In [None]:
from yolov1.dataset import Dataset
from torch.utils.data import DataLoader
from torchvision.transforms.functional import to_pil_image

# config.MODE = "test"
dataset = Dataset(S=config.S, B=config.B, C=config.C, mode=config.MODE, dataset_path=config.DATASET_DIR, transforms=transforms)


In [None]:
img_t, label_t = dataset.__getitem__(3) # Load the image at index __(int) from the dataframe csv. Depending on the config.MODE, the dataframe will either be train.csv, valid.csv or test.csv
img_t.shape, label_t.shape

In [None]:
img = to_pil_image(img_t)
img

### Predict

In [None]:
# Add a batch to the img tensor, so that it can be feed to the model. Yolo model expects shape (BATCH_SIZE, img_channel_size, Img_Size, Img_Size).
img_t = img_t.unsqueeze(0)
img_t.shape

In [None]:
out = yolo(img_t.to(config.DEVICE))
out, out.shape

### Reshape Output
* Reshape output from (1, 1372) -> (1, 7, 7, 28) ->  (7, 7, 28) 
    - Remove the batch because we are only working with one image. 

In [None]:
# Reshape output
out = out.view(1, 7, 7, 28)
out = out.squeeze()
out.shape

### Extract Bounding Boxes

In [None]:
%aimport utils.bboxes

from utils.bboxes import extract_bboxes, reconstruct_tensor

pred_bboxes = extract_bboxes(out, config) # (N, 9) [ i, j, b, class_idx, pc, x, y, w, h]
pred_bboxes.shape

### Compute Non-Max-Suppression
* Remove redundant bounding boxes from the models prediction.

In [None]:
%aimport utils.nms

from utils.nms import non_max_suppression

In [None]:
nms_bboxes = non_max_suppression(
    pred_bboxes=pred_bboxes,
    config=config
)
# The bboxes from the models prediction that survived NMS.
nms_bboxes.shape 

### Plot The Predicted And True Bounding Boxes

In [None]:
%aimport utils.plot
from utils.plot import plot_bboxes

In [None]:
label_t = label_t.to(config.DEVICE)
label_bboxes = extract_bboxes(label_t, config)

In [None]:
label_filter = label_bboxes[label_bboxes[:, 4] == 1]
label_filter # The bboxes from the label where an object is.

In [None]:
plot_bboxes(img=img, label_bboxes=label_filter, pred_bboxes=nms_bboxes, S=config.S)

### Upload Image