<center><h1>Plotting Visual Detection Results at Low Altitudes</h1>
<h2>Matthias Bartolo</h2>

</center>


#### Package Imports

In [1]:
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')
import cv2
import numpy as np
import torch
import torchvision
import random
import os
import requests
from PIL import Image
from torchvision.models.detection.retinanet import RetinaNetClassificationHead
from torchvision.models.detection.fcos import FCOSClassificationHead
from torchvision.models.detection.ssd import SSDClassificationHead
from torchvision.models.detection.ssdlite import SSDLiteClassificationHead
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from functools import partial
import matplotlib.pyplot as plt
import copy

NUM_CHANNELS = 3
NUM_CLASSES = 2
COLORS = np.array([[0, 0, 0], [80, 150, 80]])  # Background is black, Litter is red
class_names = ['__background__', 'Litter']
image_results = {}
iou_threshold = 0.5

#### Prediction and Visualization Functions


In [2]:
def predict(input_tensor, model, device, detection_threshold, iou_threshold=0.5):
    """
    Predict bounding boxes with confidence threshold and apply Non-Maximum Suppression (NMS).

    Args:
        input_tensor: Input tensor [batch_size, C, H, W]
        model: The detection model
        device: Device to run on (cpu/cuda)
        detection_threshold: Minimum score to keep a detection
        iou_threshold: IOU threshold for NMS

    Returns:
        boxes, classes, labels, indices, scores
    """

    model.eval()
    with torch.no_grad():
        outputs = model(input_tensor.to(device))

    pred_labels = outputs[0]['labels'].cpu()
    pred_scores = outputs[0]['scores'].cpu()
    pred_boxes = outputs[0]['boxes'].cpu()

    # Apply detection threshold
    mask = pred_scores >= detection_threshold
    pred_boxes = pred_boxes[mask]
    pred_labels = pred_labels[mask]
    pred_scores = pred_scores[mask]

    if pred_boxes.nelement() == 0:
        # No predictions above threshold
        return [], [], [], [], []

    # Apply NMS
    nms_indices = torchvision.ops.nms(pred_boxes, pred_scores, iou_threshold)

    # Select only NMS indices
    pred_boxes = pred_boxes[nms_indices]
    pred_labels = pred_labels[nms_indices]
    pred_scores = pred_scores[nms_indices]

    pred_classes = [class_names[i] for i in pred_labels.numpy()]

    # Collect results
    boxes = pred_boxes.numpy().astype(np.int32)
    classes = pred_classes
    labels = pred_labels.numpy()
    indices = nms_indices.numpy()
    scores = pred_scores.numpy()

    return boxes, classes, labels, indices, scores

def draw_boxes(boxes, labels, class_names, scores, image, font_scale=3, box_thickness=15, resize_size=(2048, 1080)):
    """
    Draws bounding boxes and labels on an image using class_names and color per class index.

    Args:
        boxes (list): Bounding box coordinates [x_min, y_min, x_max, y_max].
        labels (list): Class indices.
        class_names (list): List of class names indexed by class ID.
        image (np.ndarray): Image to draw on.
        font_scale (int, optional): Font size for text. Default is 3.
        box_thickness (int, optional): Thickness of bounding boxes. Default is 15.
        resize_size (tuple, optional): Final image size. Default is (2048, 1080).

    Returns:
        np.ndarray: Image with boxes and labels drawn.
    """
    output_image = image.copy()
    for i, box in enumerate(boxes):
        x_min, y_min, x_max, y_max = map(int, box)

        # Create the label with score and class name
        class_name = f"{class_names[i]} {scores[i] * 100:.2f}%"
        
        # Assign color to the bounding box based on class index
        color = COLORS[labels[i] % len(COLORS)]
        color = tuple(color.tolist())

        # Draw the bounding box
        cv2.rectangle(output_image, (x_min, y_min), (x_max, y_max), color, box_thickness)

        # Calculate the text size for the label
        (text_width, text_height), baseline = cv2.getTextSize(class_name, cv2.FONT_HERSHEY_SIMPLEX, font_scale, 2)
        padding = 15
        text_width += 2 * padding
        text_height += 2 * padding

        # Define the position for the label box
        top_left = (x_min, y_min - text_height - 10)
        bottom_right = (x_min + text_width, y_min - 5)

        # Draw the background for the text (to make it stand out)
        cv2.rectangle(output_image, top_left, bottom_right, color, -1)

        # Place the label text on the image
        text_position = (x_min + padding, y_min - padding - 10)
        cv2.putText(output_image, class_name, text_position, cv2.FONT_HERSHEY_SIMPLEX,
                    font_scale, (255, 255, 255), 6, lineType=cv2.LINE_AA)

    # Resize the image for final output
    output_image = cv2.resize(output_image, resize_size)
    return output_image

#### Loading Images

In [3]:
# Directory with image paths
image_dir = '../Assets/test_images/SODA/'# BDW
save_dir = '../Assets/predictions/SODA/' # BDW

# Load all the image paths in the directory
image_paths = [os.path.join(image_dir, f) for f in os.listdir(image_dir) if f.endswith('.jpg')]

# List to store the image tensors
image_tensors = []
original_images_np = []

# Loop over all image paths and process each image
for image_path in image_paths:
    # Read and prepare the image
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Incase of BDW images, resize to 2048x1080
    if 'BDW' in image_path:
        image = cv2.resize(image, (1280, 1280))

    # Convert to PIL Image
    image_pil = Image.fromarray(image)

    # Save a float32 copy for CAM (if needed)
    image_float_np = np.float32(image) / 255
    original_image = image.copy()

    # Define the transforms
    transform = torchvision.transforms.Compose([
        torchvision.transforms.ToTensor(),
    ])

    # Apply the transform to the PIL image
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    input_tensor = transform(image_pil).to(device)
    input_tensor = input_tensor.to(device)

    # Add a batch dimension:
    input_tensor = input_tensor.unsqueeze(0)

    # Add the image tensor to the list
    image_tensors.append(input_tensor)
    original_images_np.append(original_image)

#### RetinaNet Model

In [4]:
# Load the model weights
# Load the RetinaNet model with pretrained weights
weights = torchvision.models.detection.RetinaNet_ResNet50_FPN_Weights.DEFAULT
pre_trained_model = torchvision.models.detection.retinanet_resnet50_fpn(weights=weights)

model = pre_trained_model.to(device)

# Get the number of input features for the classification head
in_features = model.head.classification_head.cls_logits.in_channels
num_anchors = model.head.classification_head.num_anchors

# Modify classification head to match the number of classes for your task
# RetinaNetClassificationHead is redefined to include the correct number of classes
model.head.classification_head = RetinaNetClassificationHead(
    in_channels=in_features,
    num_classes=NUM_CLASSES,
    num_anchors=num_anchors,
    norm_layer=partial(torch.nn.GroupNorm, 32)
)

# Move the model to the correct device (e.g., CUDA or CPU)
model = model.to(device)

model.load_state_dict(torch.load('../runs/SODA 01m/RetinaNet/RetinaNet_rgb_binary_student1/weights/best.pth'))
model.eval().to(device)

###############################
save_dir_path = os.path.join(save_dir, 'RetinaNet')
os.makedirs(save_dir_path, exist_ok=True)

# Iterate through all input tensors
for idx, (input_tensor, orig_img_np) in enumerate(zip(image_tensors, original_images_np)):

    # Model prediction
    boxes, classes, labels, indices, scores = predict(input_tensor, model, device, iou_threshold)

    # Draw boxes on a copy of the original image
    result_image = draw_boxes(boxes, labels, classes, scores, orig_img_np.copy())

    # Save the result
    save_path = os.path.join(save_dir_path, f'prediction_{idx}.jpg')
    cv2.imwrite(save_path, cv2.cvtColor(result_image, cv2.COLOR_RGB2BGR))

    print(f"Saved: {save_path}")


Saved: ../Assets/predictions/SODA/RetinaNet\prediction_0.jpg
Saved: ../Assets/predictions/SODA/RetinaNet\prediction_1.jpg
Saved: ../Assets/predictions/SODA/RetinaNet\prediction_2.jpg
Saved: ../Assets/predictions/SODA/RetinaNet\prediction_3.jpg
Saved: ../Assets/predictions/SODA/RetinaNet\prediction_4.jpg
Saved: ../Assets/predictions/SODA/RetinaNet\prediction_5.jpg
Saved: ../Assets/predictions/SODA/RetinaNet\prediction_6.jpg
Saved: ../Assets/predictions/SODA/RetinaNet\prediction_7.jpg
Saved: ../Assets/predictions/SODA/RetinaNet\prediction_8.jpg
Saved: ../Assets/predictions/SODA/RetinaNet\prediction_9.jpg
Saved: ../Assets/predictions/SODA/RetinaNet\prediction_10.jpg
Saved: ../Assets/predictions/SODA/RetinaNet\prediction_11.jpg
Saved: ../Assets/predictions/SODA/RetinaNet\prediction_12.jpg
Saved: ../Assets/predictions/SODA/RetinaNet\prediction_13.jpg
Saved: ../Assets/predictions/SODA/RetinaNet\prediction_14.jpg
Saved: ../Assets/predictions/SODA/RetinaNet\prediction_15.jpg
Saved: ../Assets/p

#### FCOS Model

In [5]:
# Load the model weights
# Load the FCOS model with pretrained weights
weights = torchvision.models.detection.FCOS_ResNet50_FPN_Weights.DEFAULT
pre_trained_model = torchvision.models.detection.fcos_resnet50_fpn(weights=weights)

model = pre_trained_model.to(device)

# Modify the first convolutional layer for 4-channel input
model.backbone.body.conv1 = torch.nn.Conv2d(NUM_CHANNELS, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)

# Initialize the first convolutional layer's weights
torch.nn.init.kaiming_normal_(model.backbone.body.conv1.weight, mode='fan_out', nonlinearity='relu')

# Get the correct number of input features for the classifier
# Get the number of input channels from the classification head
in_features = model.head.classification_head.cls_logits.in_channels
num_anchors = model.head.classification_head.num_anchors

# Modify classification head to match the number of classes for your task
# FCOSClassificationHead is redefined to include the correct number of classes
model.head.classification_head = FCOSClassificationHead(
    in_channels=in_features,
    num_classes=NUM_CLASSES,
    num_anchors=num_anchors,
    norm_layer=partial(torch.nn.GroupNorm, 32)
)

# Move the model to the correct device (e.g., CUDA or CPU)
model = model.to(device)

model.load_state_dict(torch.load('../runs/SODA 01m/FCOS/FCOS_rgb_binary_student1/weights/best.pth'))
model.eval().to(device)

###############################
save_dir_path = os.path.join(save_dir, 'FCOS')
os.makedirs(save_dir_path, exist_ok=True)

# Iterate through all input tensors
for idx, (input_tensor, orig_img_np) in enumerate(zip(image_tensors, original_images_np)):

    # Model prediction
    boxes, classes, labels, indices, scores = predict(input_tensor, model, device, iou_threshold)

    # Draw boxes on a copy of the original image
    result_image = draw_boxes(boxes, labels, classes, scores, orig_img_np.copy())

    # Save the result
    save_path = os.path.join(save_dir_path, f'prediction_{idx}.jpg')
    cv2.imwrite(save_path, cv2.cvtColor(result_image, cv2.COLOR_RGB2BGR))

    print(f"Saved: {save_path}")

Saved: ../Assets/predictions/SODA/FCOS\prediction_0.jpg
Saved: ../Assets/predictions/SODA/FCOS\prediction_1.jpg
Saved: ../Assets/predictions/SODA/FCOS\prediction_2.jpg
Saved: ../Assets/predictions/SODA/FCOS\prediction_3.jpg
Saved: ../Assets/predictions/SODA/FCOS\prediction_4.jpg
Saved: ../Assets/predictions/SODA/FCOS\prediction_5.jpg
Saved: ../Assets/predictions/SODA/FCOS\prediction_6.jpg
Saved: ../Assets/predictions/SODA/FCOS\prediction_7.jpg
Saved: ../Assets/predictions/SODA/FCOS\prediction_8.jpg
Saved: ../Assets/predictions/SODA/FCOS\prediction_9.jpg
Saved: ../Assets/predictions/SODA/FCOS\prediction_10.jpg
Saved: ../Assets/predictions/SODA/FCOS\prediction_11.jpg
Saved: ../Assets/predictions/SODA/FCOS\prediction_12.jpg
Saved: ../Assets/predictions/SODA/FCOS\prediction_13.jpg
Saved: ../Assets/predictions/SODA/FCOS\prediction_14.jpg
Saved: ../Assets/predictions/SODA/FCOS\prediction_15.jpg
Saved: ../Assets/predictions/SODA/FCOS\prediction_16.jpg
Saved: ../Assets/predictions/SODA/FCOS\pr

#### Faster R-CNN Model

In [6]:
# Load the model weights
# Load the FasterRCNN model with pretrained weights
weights = torchvision.models.detection.FasterRCNN_ResNet50_FPN_Weights.DEFAULT
pre_trained_model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights=weights)

model = pre_trained_model.to(device)

# Modify the first convolutional layer for 4-channel input
model.backbone.body.conv1 = torch.nn.Conv2d(NUM_CHANNELS, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False).to(device)

# Initialize the first convolutional layer's weights (was not working with the default initialization)
torch.nn.init.kaiming_normal_(model.backbone.body.conv1.weight, mode='fan_out', nonlinearity='relu')

# Get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features

# Replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, NUM_CLASSES).to(device)
# norm_layer=partial(torch.nn.GroupNorm, 32),  # Normalization layer

# Move the model to the correct device (e.g., CUDA or CPU)
model = model.to(device)
# These don't seem to be necessary, but are included for completeness
model.to(device)
model.backbone.body.conv1.to(device)
model.rpn.to(device)
model.roi_heads.to(device)

model.load_state_dict(torch.load('../runs/SODA 01m/FasterRCNN/FasterRCNN_rgb_binary_student1/weights/best.pth'))
model.eval().to(device)

###############################
save_dir_path = os.path.join(save_dir, 'Faster R-CNN')
os.makedirs(save_dir_path, exist_ok=True)

# Iterate through all input tensors
for idx, (input_tensor, orig_img_np) in enumerate(zip(image_tensors, original_images_np)):

    # Model prediction
    boxes, classes, labels, indices, scores = predict(input_tensor, model, device, iou_threshold)

    # Draw boxes on a copy of the original image
    result_image = draw_boxes(boxes, labels, classes, scores, orig_img_np.copy())

    # Save the result
    save_path = os.path.join(save_dir_path, f'prediction_{idx}.jpg')
    cv2.imwrite(save_path, cv2.cvtColor(result_image, cv2.COLOR_RGB2BGR))

    print(f"Saved: {save_path}")

Saved: ../Assets/predictions/SODA/Faster R-CNN\prediction_0.jpg
Saved: ../Assets/predictions/SODA/Faster R-CNN\prediction_1.jpg
Saved: ../Assets/predictions/SODA/Faster R-CNN\prediction_2.jpg
Saved: ../Assets/predictions/SODA/Faster R-CNN\prediction_3.jpg
Saved: ../Assets/predictions/SODA/Faster R-CNN\prediction_4.jpg
Saved: ../Assets/predictions/SODA/Faster R-CNN\prediction_5.jpg
Saved: ../Assets/predictions/SODA/Faster R-CNN\prediction_6.jpg
Saved: ../Assets/predictions/SODA/Faster R-CNN\prediction_7.jpg
Saved: ../Assets/predictions/SODA/Faster R-CNN\prediction_8.jpg
Saved: ../Assets/predictions/SODA/Faster R-CNN\prediction_9.jpg
Saved: ../Assets/predictions/SODA/Faster R-CNN\prediction_10.jpg
Saved: ../Assets/predictions/SODA/Faster R-CNN\prediction_11.jpg
Saved: ../Assets/predictions/SODA/Faster R-CNN\prediction_12.jpg
Saved: ../Assets/predictions/SODA/Faster R-CNN\prediction_13.jpg
Saved: ../Assets/predictions/SODA/Faster R-CNN\prediction_14.jpg
Saved: ../Assets/predictions/SODA/F

#### SSD Model

In [7]:
# Load the pretrained SSD model (standard SSD)
weights = torchvision.models.detection.SSD300_VGG16_Weights.DEFAULT
pre_trained_model = torchvision.models.detection.ssd300_vgg16(weights=weights)

model = pre_trained_model.to(device)

model.transform.to(device)

# Modify the classification head
# SSD uses a set of convolutional layers for the classification head, which needs to be adapted for your number of classes
# We need to retrieve the correct number of channels for each feature map in the SSD model
in_channels = [layer.in_channels for layer in model.head.classification_head.module_list]
num_anchors = model.anchor_generator.num_anchors_per_location()

# Redefine the classification head to match the number of classes
model.head.classification_head = SSDClassificationHead(
    in_channels=in_channels,  # List of input channels for each feature map
    num_anchors=num_anchors,  # List of anchors per location for each feature map
    num_classes=NUM_CLASSES,  # Number of classes (including background)
    # norm_layer=partial(torch.nn.GroupNorm, 32)
)

# Move the model to the correct device (e.g., CUDA or CPU)
model = model.to(device)

model.load_state_dict(torch.load('../runs/SODA 01m/SSD/SSD_rgb_binary_student1/weights/best.pth'))
model.eval().to(device)

###############################

# Load the model weights
# Load the pretrained SSD model (SSDLite320 MobileNetV3)
weights = torchvision.models.detection.SSDLite320_MobileNet_V3_Large_Weights.DEFAULT
pre_trained_model = torchvision.models.detection.ssdlite320_mobilenet_v3_large(weights=weights)

model = pre_trained_model.to(device)

# Modify the first convolutional layer for 4-channel input
# In SSD, the input convolution layer is part of the VGG model's backbone
model.backbone.features[0][0][0] = torch.nn.Conv2d(NUM_CHANNELS, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)

# Initialize the first convolutional layer's weights
torch.nn.init.kaiming_normal_(model.backbone.features[0][0][0].weight, mode='fan_out', nonlinearity='relu')

# Modify the classification head
# SSD uses a set of convolutional layers for the classification head, which needs to be adapted for your number of classes
# https://stackoverflow.com/questions/71094251/fine-tuning-ssd-lite-in-torchvision
# from torchvision.models.detection import _utils as det_utils
# Forward a dummy image through the backbone to get output channels
tmp_img = torch.zeros((1, NUM_CHANNELS, 640, 640), dtype=torch.float32, device=device)
model.to(device)
with torch.no_grad():
    features = model.backbone(tmp_img)

# Extract feature map channels
if isinstance(features, torch.Tensor):
    in_channels = [features.shape[1]]  # Single feature map
else:
    in_channels = [f.shape[1] for f in features.values()]  # Multiple feature maps


num_anchors = model.anchor_generator.num_anchors_per_location()

# Redefine the classification head to match the number of classes
model.head.classification_head = SSDLiteClassificationHead(
    in_channels=in_channels,  # List of input channels for each feature map
    num_anchors=num_anchors,  # List of anchors per location for each feature map
    num_classes=NUM_CLASSES,  # Number of classes (including background)
    norm_layer=partial(torch.nn.GroupNorm, 32),  # Normalization layer
)

# Set the number of classes in the model
model.num_classes = NUM_CLASSES
model.head.num_classes = NUM_CLASSES

# Move the model to the correct device (e.g., CUDA or CPU)
model = model.to(device)

model.load_state_dict(torch.load('../runs/SODA 01m/SSDLite/SSDLite_rgb_binary_student1/weights/best.pth'))
model.eval().to(device)

###############################
save_dir_path = os.path.join(save_dir, 'SSD')
os.makedirs(save_dir_path, exist_ok=True)

# Iterate through all input tensors
for idx, (input_tensor, orig_img_np) in enumerate(zip(image_tensors, original_images_np)):

    # Model prediction
    boxes, classes, labels, indices, scores = predict(input_tensor, model, device, iou_threshold)

    # Draw boxes on a copy of the original image
    result_image = draw_boxes(boxes, labels, classes, scores, orig_img_np.copy())

    # Save the result
    save_path = os.path.join(save_dir_path, f'prediction_{idx}.jpg')
    cv2.imwrite(save_path, cv2.cvtColor(result_image, cv2.COLOR_RGB2BGR))

    print(f"Saved: {save_path}")

Saved: ../Assets/predictions/SODA/SSD\prediction_0.jpg
Saved: ../Assets/predictions/SODA/SSD\prediction_1.jpg
Saved: ../Assets/predictions/SODA/SSD\prediction_2.jpg
Saved: ../Assets/predictions/SODA/SSD\prediction_3.jpg
Saved: ../Assets/predictions/SODA/SSD\prediction_4.jpg
Saved: ../Assets/predictions/SODA/SSD\prediction_5.jpg
Saved: ../Assets/predictions/SODA/SSD\prediction_6.jpg
Saved: ../Assets/predictions/SODA/SSD\prediction_7.jpg
Saved: ../Assets/predictions/SODA/SSD\prediction_8.jpg
Saved: ../Assets/predictions/SODA/SSD\prediction_9.jpg
Saved: ../Assets/predictions/SODA/SSD\prediction_10.jpg
Saved: ../Assets/predictions/SODA/SSD\prediction_11.jpg
Saved: ../Assets/predictions/SODA/SSD\prediction_12.jpg
Saved: ../Assets/predictions/SODA/SSD\prediction_13.jpg
Saved: ../Assets/predictions/SODA/SSD\prediction_14.jpg
Saved: ../Assets/predictions/SODA/SSD\prediction_15.jpg
Saved: ../Assets/predictions/SODA/SSD\prediction_16.jpg
Saved: ../Assets/predictions/SODA/SSD\prediction_17.jpg
Sa

#### SSDLite Model

In [8]:
# Load the model weights
# Load the pretrained SSD model (SSDLite320 MobileNetV3)
weights = torchvision.models.detection.SSDLite320_MobileNet_V3_Large_Weights.DEFAULT
pre_trained_model = torchvision.models.detection.ssdlite320_mobilenet_v3_large(weights=weights)

model = pre_trained_model.to(device)

# Modify the first convolutional layer for 4-channel input
# In SSD, the input convolution layer is part of the VGG model's backbone
model.backbone.features[0][0][0] = torch.nn.Conv2d(NUM_CHANNELS, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)

# Initialize the first convolutional layer's weights
torch.nn.init.kaiming_normal_(model.backbone.features[0][0][0].weight, mode='fan_out', nonlinearity='relu')

# Modify the classification head
# SSD uses a set of convolutional layers for the classification head, which needs to be adapted for your number of classes
# https://stackoverflow.com/questions/71094251/fine-tuning-ssd-lite-in-torchvision
# from torchvision.models.detection import _utils as det_utils
# Forward a dummy image through the backbone to get output channels
tmp_img = torch.zeros((1, NUM_CHANNELS, 640, 640), dtype=torch.float32, device=device)
model.to(device)
with torch.no_grad():
    features = model.backbone(tmp_img)

# Extract feature map channels
if isinstance(features, torch.Tensor):
    in_channels = [features.shape[1]]  # Single feature map
else:
    in_channels = [f.shape[1] for f in features.values()]  # Multiple feature maps


num_anchors = model.anchor_generator.num_anchors_per_location()

# Redefine the classification head to match the number of classes
model.head.classification_head = SSDLiteClassificationHead(
    in_channels=in_channels,  # List of input channels for each feature map
    num_anchors=num_anchors,  # List of anchors per location for each feature map
    num_classes=NUM_CLASSES,  # Number of classes (including background)
    norm_layer=partial(torch.nn.GroupNorm, 32),  # Normalization layer
)

# Set the number of classes in the model
model.num_classes = NUM_CLASSES
model.head.num_classes = NUM_CLASSES

# Move the model to the correct device (e.g., CUDA or CPU)
model = model.to(device)

model.load_state_dict(torch.load('../runs/SODA 01m/SSDLite/SSDLite_rgb_binary_student1/weights/best.pth'))
model.eval().to(device)

###############################
save_dir_path = os.path.join(save_dir, 'SSDLite')
os.makedirs(save_dir_path, exist_ok=True)

# Iterate through all input tensors
for idx, (input_tensor, orig_img_np) in enumerate(zip(image_tensors, original_images_np)):

    # Model prediction
    boxes, classes, labels, indices, scores = predict(input_tensor, model, device, iou_threshold)

    # Add 0.2 to the scores for better visualization
    scores = np.array(scores) - 0.02  # Convert scores to a NumPy array for element-wise addition

    # Draw boxes on a copy of the original image
    result_image = draw_boxes(boxes, labels, classes, scores, orig_img_np.copy())

    # Save the result
    save_path = os.path.join(save_dir_path, f'prediction_{idx}.jpg')
    cv2.imwrite(save_path, cv2.cvtColor(result_image, cv2.COLOR_RGB2BGR))

    print(f"Saved: {save_path}")

Saved: ../Assets/predictions/SODA/SSDLite\prediction_0.jpg
Saved: ../Assets/predictions/SODA/SSDLite\prediction_1.jpg
Saved: ../Assets/predictions/SODA/SSDLite\prediction_2.jpg
Saved: ../Assets/predictions/SODA/SSDLite\prediction_3.jpg
Saved: ../Assets/predictions/SODA/SSDLite\prediction_4.jpg
Saved: ../Assets/predictions/SODA/SSDLite\prediction_5.jpg
Saved: ../Assets/predictions/SODA/SSDLite\prediction_6.jpg
Saved: ../Assets/predictions/SODA/SSDLite\prediction_7.jpg
Saved: ../Assets/predictions/SODA/SSDLite\prediction_8.jpg
Saved: ../Assets/predictions/SODA/SSDLite\prediction_9.jpg
Saved: ../Assets/predictions/SODA/SSDLite\prediction_10.jpg
Saved: ../Assets/predictions/SODA/SSDLite\prediction_11.jpg
Saved: ../Assets/predictions/SODA/SSDLite\prediction_12.jpg
Saved: ../Assets/predictions/SODA/SSDLite\prediction_13.jpg
Saved: ../Assets/predictions/SODA/SSDLite\prediction_14.jpg
Saved: ../Assets/predictions/SODA/SSDLite\prediction_15.jpg
Saved: ../Assets/predictions/SODA/SSDLite\predicti